Pooled amplicon example-standard DADA2 workflow

library(phyloseq); packageVersion("phyloseq")
#> [1] '1.46.0'
library(ggplot2); packageVersion("ggplot2")
#> [1] '3.5.1'
library(readr); packageVersion("readr")
#> [1] '2.1.5'
library(purrr); packageVersion("purrr")
#> [1] '1.0.2'
library(furrr); packageVersion("furrr")
#> Loading required package: future
#> Warning: package 'future' was built under R version 4.3.3
#> [1] '0.3.1'
library(dplyr); packageVersion("dplyr")
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
#> [1] '1.1.4'
library(stringr); packageVersion("stringr")
#> [1] '1.5.1'
library(metacoder); packageVersion("metacoder")
#> This is metacoder version 0.3.7 (stable)
#> 
#> Attaching package: 'metacoder'
#> The following object is masked from 'package:ggplot2':
#> 
#>     map_data
#> The following object is masked from 'package:phyloseq':
#> 
#>     filter_taxa
#> [1] '0.3.7'
library(data.table); packageVersion("data.table")
#> 
#> Attaching package: 'data.table'
#> The following objects are masked from 'package:dplyr':
#> 
#>     between, first, last
#> The following object is masked from 'package:purrr':
#> 
#>     transpose
#> [1] '1.15.4'
library(decontam); packageVersion("decontam")
#> [1] '1.22.0'
library(tidyr); packageVersion("tidyr")
#> [1] '1.3.1'
library(purrr); packageVersion("purrr")
#> [1] '1.0.2'
library(forcats); packageVersion("forcats")
#> [1] '1.0.0'
library(Biostrings); packageVersion("Biostrings")
#> Loading required package: BiocGenerics
#> 
#> Attaching package: 'BiocGenerics'
#> The following objects are masked from 'package:dplyr':
#> 
#>     combine, intersect, setdiff, union
#> The following objects are masked from 'package:stats':
#> 
#>     IQR, mad, sd, var, xtabs
#> The following objects are masked from 'package:base':
#> 
#>     anyDuplicated, aperm, append, as.data.frame, basename, cbind,
#>     colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
#>     get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
#>     match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
#>     Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
#>     table, tapply, union, unique, unsplit, which.max, which.min
#> Loading required package: S4Vectors
#> Loading required package: stats4
#> 
#> Attaching package: 'S4Vectors'
#> The following object is masked from 'package:tidyr':
#> 
#>     expand
#> The following objects are masked from 'package:data.table':
#> 
#>     first, second
#> The following objects are masked from 'package:dplyr':
#> 
#>     first, rename
#> The following object is masked from 'package:utils':
#> 
#>     findMatches
#> The following objects are masked from 'package:base':
#> 
#>     expand.grid, I, unname
#> Loading required package: IRanges
#> 
#> Attaching package: 'IRanges'
#> The following object is masked from 'package:data.table':
#> 
#>     shift
#> The following object is masked from 'package:metacoder':
#> 
#>     reverse
#> The following objects are masked from 'package:dplyr':
#> 
#>     collapse, desc, slice
#> The following object is masked from 'package:purrr':
#> 
#>     reduce
#> The following object is masked from 'package:phyloseq':
#> 
#>     distance
#> Loading required package: XVector
#> 
#> Attaching package: 'XVector'
#> The following object is masked from 'package:purrr':
#> 
#>     compact
#> Loading required package: GenomeInfoDb
#> Warning: package 'GenomeInfoDb' was built under R version 4.3.3
#> 
#> Attaching package: 'Biostrings'
#> The following object is masked from 'package:metacoder':
#> 
#>     complement
#> The following object is masked from 'package:base':
#> 
#>     strsplit
#> [1] '2.70.3'
library(magick); packageVersion("magick")
#> Warning: package 'magick' was built under R version 4.3.3
#> Linking to ImageMagick 6.9.12.93
#> Enabled features: cairo, fontconfig, freetype, heic, lcms, pango, raw, rsvg, webp
#> Disabled features: fftw, ghostscript, x11
#> [1] '2.8.4'
library(pdftools);packageVersion("pdftools")
#> Warning: package 'pdftools' was built under R version 4.3.3
#> Using poppler version 23.04.0
#> [1] '3.4.1'
library(vegan); packageVersion("vegan")
#> Warning: package 'vegan' was built under R version 4.3.3
#> Loading required package: permute
#> Loading required package: lattice
#> This is vegan 2.6-6.1
#> [1] '2.6.6.1'
library(grid); packageVersion("grid")
#> 
#> Attaching package: 'grid'
#> The following object is masked from 'package:Biostrings':
#> 
#>     pattern
#> [1] '4.3.2'
knitr::opts_knit$set(root.dir = "~/benchmark_demulticoder")

seed <- 1
set.seed(seed)

Demonstration of how to use demulticoder on a dataset that has pooled amplicons (RPS10 and ITS), and how generalized analysis would be done if you didn’t use demulticoder. A more nuanced analysis is shown in ‘pooled_amplicoin_analysis_demulticoder.Rmd’

Let’s also load files from the standard worfklows for its and dataset rps10 datasets, and combine. We’ll then make a combined matrix that is analogous to what was done with the demulticoder analysis



taxa_its<- read.table("standard_workflow/its/data/its_taxa.out", sep="\t", stringsAsFactors=F,header=T)

taxa_filt_its <- taxa_its %>%
  mutate(across(starts_with("tax."), ~ ifelse(get(str_replace(cur_column(), "tax.", "boot.")) < 0, NA, .))) %>%
  set_names(str_replace(names(.), "tax.", ""))

remove_prefix_its <- function(data) {
  data <- gsub("[a-z]__", "", data)  
  return(data)
}

taxa_filt_its[] <- lapply(taxa_filt_its, remove_prefix_its)

revise_species_its <- function(genus, species) {
  revise_species_its <- ifelse(is.na(species), NA, paste(genus, species, sep = "_"))
  return(revise_species_its)
}

taxa_filt_its$Species <- revise_species_its(taxa_filt_its$Genus, taxa_filt_its$Species)
taxa_its_df <- as.data.frame(taxa_filt_its)
taxa_its_df$sequence <- rownames(taxa_its_df)
taxa_its_df <- taxa_its_df[, c("sequence", setdiff(names(taxa_its_df), "sequence"))]
rownames(taxa_its_df) <- NULL

#Load ASV matrix now
seqtab_nochim_its <- read.table("standard_workflow/its/data/its_seqtab_nochim.out")
seqtab_nochim_its <- t(seqtab_nochim_its)
seqtab_nochim_its_df <- as.data.frame(seqtab_nochim_its)
seqtab_nochim_its_df$sequence <- rownames(seqtab_nochim_its_df)
seqtab_nochim_its_df <- seqtab_nochim_its_df[, c("sequence", setdiff(names(seqtab_nochim_its_df), "sequence"))]
rownames(seqtab_nochim_its_df) <- NULL


#Combine matrices
combined_df_its <- merge(seqtab_nochim_its_df, taxa_its_df, by = "sequence", all = TRUE)

taxonomic_cols <- c("Kingdom", "Phylum", "Class", "Order", 
                    "Family", "Genus", "Species")

bootstrap_cols <- c("boot.Kingdom", "boot.Phylum", "boot.Class", "boot.Order", 
                    "boot.Family", "boot.Genus", "boot.Species")

dada2_tax <- mapply(function(tax, boot, level) {
  paste(tax, boot, level, sep = "--")
}, combined_df_its[, taxonomic_cols], combined_df_its[, bootstrap_cols], 
   c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "Species"))

combined_df_its$dada2_tax <- apply(dada2_tax, 1, paste, collapse = ";")

#Remove all other tax and boot columns
combined_df_its <- combined_df_its[, !(names(combined_df_its) %in% c(taxonomic_cols, bootstrap_cols))]

# Reorder columns to put 'taxa_bootstrap_combined' after 'sequence'
combined_df_its <- combined_df_its[, c("sequence", "dada2_tax", setdiff(names(combined_df_its), c("sequence", "dada2_tax")))]

#Add domain-100-Eukaryota; prefix before Kingdom
combined_df_its$dada2_tax <- gsub("Fungi", "Eukaryota--100--Domain;Fungi", combined_df_its$dada2_tax)

#Load metadata
samdf_its<- read.csv("standard_workflow/its/data/metadata_its.csv")

#Save file
write.csv(combined_df_its, "standard_workflow/its/data/its_abundance_matrix_combined_dada2workflow.csv")

Track number of reads throughout the pipeline

track_reads_standard_wf_its <- read.table("standard_workflow/its/data/its_trackreads.out", sep="\t", stringsAsFactors = F, header=T)

Let’s also do the same for the rps10 dataset

taxa_rps10<- read.table("standard_workflow/rps10/data/rps10_taxa.out", sep="\t", stringsAsFactors=F,header=T)

taxa_filt_rps10 <- taxa_rps10 %>%
  mutate(across(starts_with("tax."), ~ ifelse(get(str_replace(cur_column(), "tax.", "boot.")) < 0, NA, .))) %>%
  set_names(str_replace(names(.), "tax.", ""))

remove_prefix_rps10 <- function(data) {
  data <- gsub("[a-z]__", "", data)  
  return(data)
}

taxa_filt_rps10[] <- lapply(taxa_filt_rps10, remove_prefix_rps10)

taxa_rps10_df <- as.data.frame(taxa_filt_rps10)
taxa_rps10_df$sequence <- rownames(taxa_rps10_df)
taxa_rps10_df <- taxa_rps10_df[, c("sequence", setdiff(names(taxa_rps10_df), "sequence"))]
rownames(taxa_rps10_df) <- NULL

#Load ASV matrix now
seqtab_nochim_rps10 <- read.table("standard_workflow/rps10/data/rps10_seqtab_nochim.out")
rownames(seqtab_nochim_rps10) <- gsub("__", "_", rownames(seqtab_nochim_rps10))

seqtab_nochim_rps10 <- t(seqtab_nochim_rps10)
seqtab_nochim_rps10_df <- as.data.frame(seqtab_nochim_rps10)
seqtab_nochim_rps10_df$sequence <- rownames(seqtab_nochim_rps10_df)
seqtab_nochim_rps10_df <- seqtab_nochim_rps10_df[, c("sequence", setdiff(names(seqtab_nochim_rps10_df), "sequence"))]
rownames(seqtab_nochim_rps10_df) <- NULL

#Combine matrices
combined_df_rps10 <- merge(seqtab_nochim_rps10_df, taxa_rps10_df, by = "sequence", all = TRUE)

taxonomic_cols <- c("Kingdom", "Phylum", "Class", "Order", 
                    "Family", "Genus", "Species")

bootstrap_cols <- c("boot.Kingdom", "boot.Phylum", "boot.Class", "boot.Order", 
                    "boot.Family", "boot.Genus", "boot.Species")

dada2_tax <- mapply(function(tax, boot, level) {
  paste(tax, boot, level, sep = "--")
}, combined_df_rps10[, taxonomic_cols], combined_df_rps10[, bootstrap_cols], 
   c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "Species"))

combined_df_rps10$dada2_tax <- apply(dada2_tax, 1, paste, collapse = ";")

#Remove all other tax and boot columns
combined_df_rps10 <- combined_df_rps10[, !(names(combined_df_rps10) %in% c(taxonomic_cols, bootstrap_cols))]

# Reorder columns to put 'taxa_bootstrap_combined' after 'sequence'
combined_df_rps10 <- combined_df_rps10[, c("sequence", "dada2_tax", setdiff(names(combined_df_rps10), c("sequence", "dada2_tax")))]

#Add domain-100-Eukaryota; prefix before Kingdom
combined_df_rps10$dada2_tax <- gsub("Stramenopila", "Eukaryota--100--Domain;Stramenopila", combined_df_rps10$dada2_tax)

#Load metadata
samdf_rps10<- read.csv("standard_workflow/rps10/data/metadata_rps10.csv")

#Save file
write.csv(combined_df_rps10, "standard_workflow/rps10/data/rps10_abundance_matrix_combined_dada2workflow.csv")

#Load DADA2 metadata file
samdf_rps10<- read.csv("standard_workflow/rps10/data/metadata_rps10.csv")

Track number of reads throughout the pipeline

track_reads_standard_wf_rps10 <- read.table("standard_workflow/rps10/data/rps10_trackreads.out", sep="\t", stringsAsFactors = F, header=T)

Finally Let’s first combine rps10 and its data into fully combined matrices

sample_cols_its <- setdiff(names(combined_df_its), c("sequence", "dada2_tax"))
sample_cols_rps10 <- setdiff(names(combined_df_rps10), c("sequence", "dada2_tax"))

if(!all(sample_cols_its == sample_cols_rps10)) {
  stop("Sample columns do not match between ITS and RPS10 dataframes!")
}

abundance <- rbind(
  combined_df_its[, c("sequence", "dada2_tax", sample_cols_its)],  # ITS data
  combined_df_rps10[, c("sequence", "dada2_tax", sample_cols_rps10)]  # RPS10 data
)

write.csv(abundance, "standard_workflow/combined_data/final_combined_abundance_matrix_standard_workflow.csv")

#Let's also load metadata file-consistent with demulticoder analysis
metadata_path <- file.path("standard_workflow/combined_data/metadata.csv")
metadata <- read_csv(metadata_path)
#> Rows: 173 Columns: 9
#> ── Column specification ────────────────────────────────────────────────────────
#> Delimiter: ","
#> chr (4): sample_name, well, organism, sample_type
#> dbl (3): plate, path_conc, experiment
#> lgl (2): flooded, is_ambiguous
#> 
#> ℹ Use `spec()` to retrieve the full column specification for this data.
#> ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
print(metadata)
#> # A tibble: 173 × 9
#>    sample_name plate well  organism flooded path_conc experiment sample_type
#>    <chr>       <dbl> <chr> <chr>    <lgl>       <dbl>      <dbl> <chr>      
#>  1 S1              1 A01   Cry      TRUE          100          1 Sample     
#>  2 S10             1 B02   Cry      TRUE            1          1 Sample     
#>  3 S100            2 B02   Cin      TRUE            1          2 Sample     
#>  4 S101            2 C02   Plu      FALSE           1          2 Sample     
#>  5 S102            2 D02   Cry      TRUE            1          2 Sample     
#>  6 S103            2 F02   Cin      FALSE           1          2 Sample     
#>  7 S104            2 G02   Plu      FALSE           1          2 Sample     
#>  8 S105            2 H02   Cry      TRUE          100          2 Sample     
#>  9 S106            2 A03   Plu      FALSE         100          2 Sample     
#> 10 S107            2 B03   Cin      TRUE          100          2 Sample     
#> # ℹ 163 more rows
#> # ℹ 1 more variable: is_ambiguous <lgl>

Now, let’s proceed with our analogous analysis to demulticoder analysis

Let’s collect some summary stats for each dataset

its_summary_reads<-summary(track_reads_standard_wf_its)
print(its_summary_reads)
#>      input           filtered       denoisedF       denoisedR    
#>  Min.   :   110   Min.   :    5   Min.   :    1   Min.   :    1  
#>  1st Qu.: 23642   1st Qu.:12765   1st Qu.:12472   1st Qu.:12342  
#>  Median : 37530   Median :22939   Median :22628   Median :22511  
#>  Mean   : 43474   Mean   :25532   Mean   :25285   Mean   :25155  
#>  3rd Qu.: 58507   3rd Qu.:31793   3rd Qu.:31567   3rd Qu.:31446  
#>  Max.   :137025   Max.   :87247   Max.   :87073   Max.   :86985  
#>      merged         nonchim     
#>  Min.   :    0   Min.   :    0  
#>  1st Qu.:11864   1st Qu.:11864  
#>  Median :21767   Median :21767  
#>  Mean   :24398   Mean   :24307  
#>  3rd Qu.:30396   3rd Qu.:29957  
#>  Max.   :86697   Max.   :86685

rps10_summary_reads<-summary(track_reads_standard_wf_rps10)
print(rps10_summary_reads)
#>      input           filtered        denoisedF        denoisedR     
#>  Min.   :    94   Min.   :    26   Min.   :     7   Min.   :     6  
#>  1st Qu.: 38305   1st Qu.: 29337   1st Qu.: 29321   1st Qu.: 29314  
#>  Median : 61388   Median : 48530   Median : 48503   Median : 48497  
#>  Mean   : 74279   Mean   : 56779   Mean   : 56715   Mean   : 56712  
#>  3rd Qu.: 95914   3rd Qu.: 75033   3rd Qu.: 74536   3rd Qu.: 74530  
#>  Max.   :264287   Max.   :219173   Max.   :218786   Max.   :219079  
#>      merged          nonchim      
#>  Min.   :     0   Min.   :     0  
#>  1st Qu.: 29277   1st Qu.: 29277  
#>  Median : 48063   Median : 48063  
#>  Mean   : 56307   Mean   : 55623  
#>  3rd Qu.: 73544   3rd Qu.: 73431  
#>  Max.   :218307   Max.   :217160

Let’s now retrieve stats but only on well supported taxa

# Function to extract taxonomic levels and count unique entries with bootstrap filtering
# Function to extract taxonomic levels and count unique entries
analyze_taxonomy <- function(data) {
  # Get taxonomy data
  tax_data <- data$dada2_tax
  
  # Split taxonomy string into components
  tax_splits <- strsplit(tax_data, ";")
  
  # Function to safely extract taxonomic level with proper rank checking
  extract_tax_level <- function(tax_array, rank) {
    # Find the position containing the rank
    rank_pos <- grep(rank, tax_array)
    if(length(rank_pos) > 0) {
      # Extract and clean the taxonomy name
      tax <- tax_array[rank_pos]
      # Remove the rank pattern and any leading/trailing whitespace
      cleaned_tax <- trimws(gsub(paste0("--\\d+--", rank), "", tax))
      return(cleaned_tax)
    }
    return(NA)
  }
  
  # Extract each taxonomic level with proper hierarchy checking
  families <- sapply(tax_splits, function(x) extract_tax_level(x, "Family"))
  genera <- sapply(tax_splits, function(x) extract_tax_level(x, "Genus"))
  species <- sapply(tax_splits, function(x) extract_tax_level(x, "Species"))
  
  # Create a data frame to check hierarchy consistency
  tax_df <- data.frame(
    Family = families,
    Genus = genera,
    Species = species,
    stringsAsFactors = FALSE
  )
  
  # Remove NA values before counting
  families <- families[!is.na(families)]
  genera <- genera[!is.na(genera)]
  species <- species[!is.na(species)]
  
  # Count unique entries
  unique_counts <- list(
    families = length(unique(families)),
    genera = length(unique(genera)),
    species = length(unique(species))
  )
  
  # Get prevalence with hierarchy checking
  family_prev <- table(families)
  genus_prev <- table(genera)
  species_prev <- table(species)
  
  # Sort prevalence tables
  family_prev <- sort(family_prev, decreasing = TRUE)
  genus_prev <- sort(genus_prev, decreasing = TRUE)
  species_prev <- sort(species_prev, decreasing = TRUE)
  
  # Print top 5 of each level for verification
  cat("\nTop 5 most prevalent Families:\n")
  print(head(family_prev, 5))
  cat("\nTop 5 most prevalent Genera:\n")
  print(head(genus_prev, 5))
  cat("\nTop 5 most prevalent Species:\n")
  print(head(species_prev, 5))
  
  # Get most prevalent taxa with hierarchy verification
  most_prevalent <- list(
    family = if(length(family_prev) > 0) names(family_prev)[1] else "None found",
    genus = if(length(genus_prev) > 0) names(genus_prev)[1] else "None found",
    species = if(length(species_prev) > 0) names(species_prev)[1] else "None found"
  )
  
  return(list(
    unique_counts = unique_counts,
    most_prevalent = most_prevalent,
    tax_df = tax_df  # Return full taxonomy dataframe for inspection
  ))
}

# Run analysis for both datasets
if(exists("combined_df_rps10")) {
  rps10_results <- analyze_taxonomy(combined_df_rps10)
}
#> 
#> Top 5 most prevalent Families:
#> families
#>      Pythiaceae Peronosporaceae Saprolegniaceae   Lagenidiaceae Saplolegniaceae 
#>             171             126              20              11               9 
#> 
#> Top 5 most prevalent Genera:
#> genera
#>      Pythium Phytophthora  Peronospora  Aphanomyces  Saprolegnia 
#>          152           96           16           15           11 
#> 
#> Top 5 most prevalent Species:
#> species
#> Phytophthora_sp.kununurra Phytophthora_citrophthora        Pythium_dissotocum 
#>                        24                        19                        19 
#>          Pythium_lutarium        Phytophthora_sojae 
#>                        14                        13

if(exists("combined_df_its")) {
  its_results <- analyze_taxonomy(combined_df_its)
}
#> 
#> Top 5 most prevalent Families:
#> families
#>         Fungi_fam_Incertae_sedis                  Serendipitaceae 
#>                              356                              201 
#> Rozellomycota_fam_Incertae_sedis                  Hyaloscyphaceae 
#>                              176                              157 
#>                       Tuberaceae 
#>                              128 
#> 
#> Top 5 most prevalent Genera:
#> genera
#>         Fungi_gen_Incertae_sedis                      Serendipita 
#>                              356                              197 
#> Rozellomycota_gen_Incertae_sedis                            Tuber 
#>                              176                              128 
#>                      Hyaloscypha 
#>                               94 
#> 
#> Top 5 most prevalent Species:
#> species
#>                         NA        Tuber_pseudobrumale 
#>                       1315                        128 
#>            Pezoloma_ericae     Hyaloscypha_finlandica 
#>                         31                         20 
#> Clathrosphaerina_zalewskii 
#>                         19

Let’s now use combined CSV files to make a few more plots-this shows how we can easily convert metadata and combined ASV matrix into a another taxmap object

First we will configure the combined taxmap object

#load reference database
rps10_database <- read_fasta("demulticoder/data/rps10_reference_db.fa")
innoc_species <- c(Cin = "Phytophthora_cinnamomi",  Plu = "Phytophthora_plurivora", Cry = "Pythium_cryptoirregulare")
innoc_regex <- paste0('(', paste0(innoc_species, collapse = '|'), ')')
innoc_seqs <- rps10_database[grepl(names(rps10_database), pattern = innoc_regex)]
innoc_seqs <- innoc_seqs[! duplicated(innoc_seqs)]
names(innoc_seqs) <- str_match(names(innoc_seqs), pattern = innoc_regex)[, 2]

iupac_match <- function(asv_chars, ref_chars) {
  map2_lgl(asv_chars, ref_chars, function(asv, ref) grepl(asv, pattern = paste0('[', IUPAC_CODE_MAP[ref], ']+')))
}

# Count number of mismatches in an alignment, allowing for IUPAC codes in reference
align_mismatch <- function(alignment) {
  asv_chars <- strsplit(as.character(alignment@pattern), '')[[1]]
  ref_chars <- strsplit(as.character(alignment@subject), '')[[1]]
  sum(! iupac_match(asv_chars, ref_chars))
}

# Align each sequence to each asv and make table of results"
aligned_data_path <- file.path("standard_workflow/combined_data", "infection_aligned_data.rds")

if (file.exists(aligned_data_path)) {
  aligned_data <- readRDS(aligned_data_path)
} else {
  aligned_data <-  future_map_dfr(seq_along(innoc_seqs), function(i) {
    aligned <- lapply(abundance$sequence, function(asv) pairwiseAlignment(pattern = asv, subject = innoc_seqs[i], type = 'global-local'))
    print(paste("Processing species:", names(innoc_seqs)[i]))
    tibble(
      species = names(innoc_seqs)[i],
      align_len = map_dbl(aligned, nchar),
      mismatch = map_dbl(aligned, align_mismatch),
      pid = (align_len - mismatch) / align_len,
      asv_seq = abundance$sequence, 
      ref_seq = innoc_seqs[i],
      alignment = aligned
    )
  })
  saveRDS(aligned_data, file = aligned_data_path)
}

# Convert ASV abundances to proportions for use with matches
abundance_prop <- abundance
abundance_prop[metadata$sample_name] <- lapply(abundance_prop[metadata$sample_name], function(counts) counts/sum(counts))

innoculum_asv_mismatch_threshold <-1
# Sum the read counts of ASVs representing the same species used for inoculation
infection_data <- aligned_data %>%
  filter(mismatch <= innoculum_asv_mismatch_threshold) %>%
  left_join(abundance_prop, by = c(asv_seq = "sequence")) %>%
  group_by(asv_seq) %>% slice_min(mismatch, with_ties = FALSE) # Only consider the best match per ASV

Prepare trimmed tables for subsequent analysis

metadata <-samdf_its
## Update abundance matrix and metadata with results
inoculum_asv_key <- setNames(infection_data$species, infection_data$asv_seq)
abundance <- mutate(abundance, is_inoculum = sequence %in% infection_data$asv_seq, inoculum = inoculum_asv_key[sequence], .after = "dada2_tax")
write_csv(abundance, file.path('standard_workflow/combined_data', 'abundance_with_infection_data.csv'))

#Let's append metadata table with more info
species_name_key <- c(Phytophthora_cinnamomi = "cin_prop", Phytophthora_plurivora = "plu_prop", Pythium_cryptoirregulare = "cry_prop")
sample_infection_data <- infection_data %>%
  group_by(species) %>% summarise_at(metadata$sample_name, sum) %>% # Sum per inoculum species
  gather(key = "sample_name", value = "read_prop", !!! metadata$sample_name) %>%
  mutate(species = species_name_key[species]) %>%
  mutate(read_prop = ifelse(is.nan(read_prop), 0, read_prop)) %>%
  tidyr::spread(key = 'species', value = 'read_prop')
metadata <- metadata %>%
  select(sample_name, plate, well, organism, flooded, path_conc, experiment, sample_type, is_ambiguous) %>% # Avoid duplicating columns from previous runs
  left_join(sample_infection_data, by = "sample_name")

# Get expected inoculum proportion and non-target proportion
metadata$expected_innoc_prop <- map_dbl(1:nrow(metadata), function(i) {
  if (metadata$sample_type[i] == "Mock community") {
    return(NA)
  } else if (metadata$sample_type[i] == "Negative control" || metadata$organism[i] == "Control") {
    return(NA)
  } else {
    prop_col <- paste0(tolower(metadata$organism[i]), '_prop')
    return(metadata[[i, prop_col]])
  }
})
metadata$unexpected_innoc_prop <- map_dbl(1:nrow(metadata), function(i) {
  prop_cols <- c("cin_prop", "cry_prop", "plu_prop")
  if (metadata$sample_type[i] == "Mock community"  || metadata$sample_type[i] == "Negative control" || metadata$organism[i] == "Control") {
    return(sum(metadata[i, prop_cols]))
  } else {
    expected_prop_col <- paste0(tolower(metadata$organism[i]), '_prop')
    prop_cols <- prop_cols[! prop_cols %in% expected_prop_col]
    return(sum(metadata[i, prop_cols]))
  }
})

# Check if expected inoculum was found
metadata$expected_innoc <- map_lgl(1:nrow(metadata), function(i) {
  all_innoc <- sum(metadata[i, c('cin_prop', 'cry_prop', 'plu_prop')])
  if (metadata$sample_type[i] == "Mock community") {
    return(NA)
  } else if (metadata$sample_type[i] == "Negative control" || metadata$organism[i] == "Control") {
    return(all_innoc == 0)
  } else {
    prop_col <- paste0(tolower(metadata$organism[i]), '_prop')
    return(metadata[[i, prop_col]] > 0)
  }
})

# Check if ONLY expected inoculum was found
metadata$only_expected_innoc <- map_lgl(1:nrow(metadata), function(i) {
  all_innoc <- sum(metadata[i, c('cin_prop', 'cry_prop', 'plu_prop')])
  if (metadata$sample_type[i] == "Mock community") {
    return(NA)
  } else if (metadata$sample_type[i] == "Negative control" || metadata$organism[i] == "Control") {
    return(all_innoc == 0)
  } else {
    prop_col <- paste0(tolower(metadata$organism[i]), '_prop')
    return(metadata[[i, prop_col]] > 0 && metadata[[i, prop_col]] == all_innoc)
  }
})

# Look at number of samples with expected inoculum
table(metadata$expected_innoc)
#> 
#> FALSE  TRUE 
#>    88    81
table(metadata$only_expected_innoc)
#> 
#> FALSE  TRUE 
#>   107    62

metadata$valid_inoc <- (is.na(metadata$expected_innoc_prop) | metadata$expected_innoc_prop > 0.0001) & metadata$unexpected_innoc_prop < 0.0001
map_dbl(split(metadata$valid_inoc, metadata$organism), sum)
#>     Cin Control     Cry     Plu 
#>      26       9      16       8

# Save new metadata
write_csv(metadata, file.path('standard_workflow/combined_data', 'metadata_with_infection_data.csv'))

For the sample data, I will add columns for the proportion of reads in each sample representing each inoculum as well as a columns that indicate whether the expected inoculum was found, and if so, if it was the only inoculum found.

# Get per-sample proportions of reads
species_name_key <- c(Phytophthora_cinnamomi = "cin_prop", Phytophthora_plurivora = "plu_prop", Pythium_cryptoirregulare = "cry_prop")
sample_infection_data <- infection_data %>%
  group_by(species) %>% summarise_at(metadata$sample_name, sum) %>% # Sum per inoculum species
  tidyr::gather(key = "sample_name", value = "read_prop", !!! metadata$sample_name) %>%
  mutate(species = species_name_key[species]) %>%
  mutate(read_prop = ifelse(is.nan(read_prop), 0, read_prop)) %>%
  tidyr::spread(key = 'species', value = 'read_prop')
metadata <- metadata %>%
  select(sample_name, plate, well, organism, flooded, path_conc, experiment, sample_type, is_ambiguous) %>% # Avoid duplicating columns from previous runs
  left_join(sample_infection_data, by = "sample_name")

# Get expected inoculum proportion and non-target proportion
metadata$expected_innoc_prop <- map_dbl(1:nrow(metadata), function(i) {
  if (metadata$sample_type[i] == "Mock community") {
    return(NA)
  } else if (metadata$sample_type[i] == "Negative control" || metadata$organism[i] == "Control") {
    return(NA)
  } else {
    prop_col <- paste0(tolower(metadata$organism[i]), '_prop')
    return(metadata[[i, prop_col]])
  }
})
metadata$unexpected_innoc_prop <- map_dbl(1:nrow(metadata), function(i) {
  prop_cols <- c("cin_prop", "cry_prop", "plu_prop")
  if (metadata$sample_type[i] == "Mock community"  || metadata$sample_type[i] == "Negative control" || metadata$organism[i] == "Control") {
    return(sum(metadata[i, prop_cols]))
  } else {
    expected_prop_col <- paste0(tolower(metadata$organism[i]), '_prop')
    prop_cols <- prop_cols[! prop_cols %in% expected_prop_col]
    return(sum(metadata[i, prop_cols]))
  }
})

# Check if expected inoculum was found
metadata$expected_innoc <- map_lgl(1:nrow(metadata), function(i) {
  all_innoc <- sum(metadata[i, c('cin_prop', 'cry_prop', 'plu_prop')])
  if (metadata$sample_type[i] == "Mock community") {
    return(NA)
  } else if (metadata$sample_type[i] == "Negative control" || metadata$organism[i] == "Control") {
    return(all_innoc == 0)
  } else {
    prop_col <- paste0(tolower(metadata$organism[i]), '_prop')
    return(metadata[[i, prop_col]] > 0)
  }
})

# Check if ONLY expected inoculum was found
metadata$only_expected_innoc <- map_lgl(1:nrow(metadata), function(i) {
  all_innoc <- sum(metadata[i, c('cin_prop', 'cry_prop', 'plu_prop')])
  if (metadata$sample_type[i] == "Mock community") {
    return(NA)
  } else if (metadata$sample_type[i] == "Negative control" || metadata$organism[i] == "Control") {
    return(all_innoc == 0)
  } else {
    prop_col <- paste0(tolower(metadata$organism[i]), '_prop')
    return(metadata[[i, prop_col]] > 0 && metadata[[i, prop_col]] == all_innoc)
  }
})

# Look at number of samples with expected inoculum
table(metadata$expected_innoc)
#> 
#> FALSE  TRUE 
#>    88    81
table(metadata$only_expected_innoc)
#> 
#> FALSE  TRUE 
#>   107    62

metadata$valid_inoc <- (is.na(metadata$expected_innoc_prop) | metadata$expected_innoc_prop > 0.0001) & metadata$unexpected_innoc_prop < 0.0001
map_dbl(split(metadata$valid_inoc, metadata$organism), sum)
#>     Cin Control     Cry     Plu 
#>      26       9      16       8

# Save new metadata
write_csv(metadata, file.path('standard_workflow/combined_data', 'metadata_with_infection_data.csv'))

Let’s convert our new matrix and associated metadata table to a taxmap object

metadata <- filter(metadata, ! is_ambiguous)
abundance <- filter(abundance, ! is_inoculum)

#Filter out mock community samples and also any mock community samples as well
metadata <- filter(metadata, sample_type != "Mock community" & sample_type != "Negative control")

obj <- parse_tax_data(abundance, class_cols = 'dada2_tax', class_sep = ';',
                      class_regex = '^(.+)--(.+)--(.+)$',
                      class_key = c(taxon = 'taxon_name', boot = 'info', rank = 'taxon_rank'))
names(obj$data) <- c('abund', 'score')
obj <- transmute_obs(obj, 'score', sequence = sequence[input_index], boot = boot, rank = rank)


# For diversity calculations, we'll use proportions of read depth
# We'll set any proportion to 0 that is less than the inverse of the read count of the non-control sample with the fewest reads.
# This should account for unequal sample read depth without the randomness of rarefaction.

metadata$raw_count <- colSums(obj$data$abund[, metadata$sample_name])
lowest_count <- min(metadata$raw_count[! is.na(metadata$organism)])
lowest_count
#> [1] 12661
obj$data$prop <- calc_obs_props(obj, data = 'abund', cols = metadata$sample_name)
#> Calculating proportions from counts for 162 columns for 2697 observations.
obj$data$prop <- zero_low_counts(obj, data = 'prop', min_count = 1 / lowest_count, cols = metadata$sample_name)
#> Zeroing 2180 of 436914 counts less than 7.89827027880894e-05.
obj$data$prop
#> # A tibble: 2,697 × 163
#>    taxon_id       S1     S10     S100     S101  S102     S103     S104     S105
#>    <chr>       <dbl>   <dbl>    <dbl>    <dbl> <dbl>    <dbl>    <dbl>    <dbl>
#>  1 bfz      0        0       0        0            0 0        0        0       
#>  2 bga      0        0       0        0            0 0        0        0       
#>  3 bgb      0        0       0        0            0 0        0        0       
#>  4 bgc      0        0       0        0            0 0        0        0       
#>  5 bgc      0        0       0        0            0 0        0.00687  0       
#>  6 bgd      0        0       0        0.000587     0 0        0        0.000555
#>  7 bge      0.000370 0.00128 0.000180 0.000681     0 0        0        0       
#>  8 bgd      0        0       0        0            0 0.000911 0.000634 0       
#>  9 bgf      0        0       0        0            0 0        0        0       
#> 10 bgf      0        0       0        0            0 0        0        0       
#> # ℹ 2,687 more rows
#> # ℹ 154 more variables: S106 <dbl>, S107 <dbl>, S108 <dbl>, S109 <dbl>,
#> #   S11 <dbl>, S110 <dbl>, S111 <dbl>, S112 <dbl>, S113 <dbl>, S114 <dbl>,
#> #   S115 <dbl>, S116 <dbl>, S117 <dbl>, S118 <dbl>, S119 <dbl>, S12 <dbl>,
#> #   S120 <dbl>, S121 <dbl>, S122 <dbl>, S123 <dbl>, S124 <dbl>, S125 <dbl>,
#> #   S126 <dbl>, S127 <dbl>, S128 <dbl>, S129 <dbl>, S13 <dbl>, S130 <dbl>,
#> #   S131 <dbl>, S132 <dbl>, S133 <dbl>, S134 <dbl>, S135 <dbl>, S136 <dbl>, …

obj$data$prop[metadata$sample_name] <- map(metadata$sample_name, function(id) {
  out <- obj$data$prop[[id]]
  out[is.na(out) | is.nan(out)] <- 0
  out
})

obj$data$prop
#> # A tibble: 2,697 × 163
#>    taxon_id       S1     S10     S100     S101  S102     S103     S104     S105
#>    <chr>       <dbl>   <dbl>    <dbl>    <dbl> <dbl>    <dbl>    <dbl>    <dbl>
#>  1 bfz      0        0       0        0            0 0        0        0       
#>  2 bga      0        0       0        0            0 0        0        0       
#>  3 bgb      0        0       0        0            0 0        0        0       
#>  4 bgc      0        0       0        0            0 0        0        0       
#>  5 bgc      0        0       0        0            0 0        0.00687  0       
#>  6 bgd      0        0       0        0.000587     0 0        0        0.000555
#>  7 bge      0.000370 0.00128 0.000180 0.000681     0 0        0        0       
#>  8 bgd      0        0       0        0            0 0.000911 0.000634 0       
#>  9 bgf      0        0       0        0            0 0        0        0       
#> 10 bgf      0        0       0        0            0 0        0        0       
#> # ℹ 2,687 more rows
#> # ℹ 154 more variables: S106 <dbl>, S107 <dbl>, S108 <dbl>, S109 <dbl>,
#> #   S11 <dbl>, S110 <dbl>, S111 <dbl>, S112 <dbl>, S113 <dbl>, S114 <dbl>,
#> #   S115 <dbl>, S116 <dbl>, S117 <dbl>, S118 <dbl>, S119 <dbl>, S12 <dbl>,
#> #   S120 <dbl>, S121 <dbl>, S122 <dbl>, S123 <dbl>, S124 <dbl>, S125 <dbl>,
#> #   S126 <dbl>, S127 <dbl>, S128 <dbl>, S129 <dbl>, S13 <dbl>, S130 <dbl>,
#> #   S131 <dbl>, S132 <dbl>, S133 <dbl>, S134 <dbl>, S135 <dbl>, S136 <dbl>, …

#Let's modify the metadata sheet
metadata <- metadata %>%
  mutate(organism = fct_relevel(ordered(organism), "Control", "Cin", "Plu", "Cry"))

Alpha diversity

abund_table <- obj$data$abund[metadata$sample_name]
metadata$richness <- vegan::specnumber(abund_table, MARGIN = 2)
metadata$shannon <- vegan::diversity(abund_table, MARGIN = 2, index = "shannon")
metadata$invsimpson <- vegan::diversity(abund_table, MARGIN = 2, index = "invsimpson")
write.csv(metadata, file.path("standard_workflow", "combined_results/alpha_diversity.csv"))
print(metadata)
#>     sample_name plate well organism flooded path_conc experiment sample_type
#> 1            S1     1  A01      Cry    TRUE       100          1      Sample
#> 2           S10     1  B02      Cry    TRUE         1          1      Sample
#> 3          S100     2  B02      Cin    TRUE         1          2      Sample
#> 4          S101     2  C02      Plu   FALSE         1          2      Sample
#> 5          S102     2  D02      Cry    TRUE         1          2      Sample
#> 6          S103     2  F02      Cin   FALSE         1          2      Sample
#> 7          S104     2  G02      Plu   FALSE         1          2      Sample
#> 8          S105     2  H02      Cry    TRUE       100          2      Sample
#> 9          S106     2  A03      Plu   FALSE       100          2      Sample
#> 10         S107     2  B03      Cin    TRUE       100          2      Sample
#> 11         S108     2  C03  Control    TRUE         0          2      Sample
#> 12         S109     2  D03      Cin    TRUE         1          2      Sample
#> 13          S11     1  C02      Plu   FALSE         1          1      Sample
#> 14         S110     2  E03      Cry    TRUE       100          2      Sample
#> 15         S111     2  F03      Cin   FALSE       100          2      Sample
#> 16         S112     2  G03  Control    TRUE         0          2      Sample
#> 17         S113     2  H03      Cry   FALSE         1          2      Sample
#> 18         S114     2  A04      Plu   FALSE       100          2      Sample
#> 19         S115     2  B04  Control   FALSE         0          2      Sample
#> 20         S116     2  D04      Plu    TRUE       100          2      Sample
#> 21         S117     2  E04      Cry    TRUE         1          2      Sample
#> 22         S118     2  F04      Cry   FALSE       100          2      Sample
#> 23         S119     2  G04  Control    TRUE         0          2      Sample
#> 24          S12     1  D02      Plu    TRUE         1          1      Sample
#> 25         S120     2  H04      Cry   FALSE         1          2      Sample
#> 26         S121     2  A05      Cin    TRUE       100          2      Sample
#> 27         S122     2  B05      Cry   FALSE       100          2      Sample
#> 28         S123     2  C05      Plu    TRUE         1          2      Sample
#> 29         S124     2  D05      Cry    TRUE         1          2      Sample
#> 30         S125     2  E05      Plu   FALSE         1          2      Sample
#> 31         S126     2  F05      Plu    TRUE         1          2      Sample
#> 32         S127     2  G05      Plu   FALSE       100          2      Sample
#> 33         S128     2  H05      Plu    TRUE       100          2      Sample
#> 34         S129     2  A06  Control    TRUE         0          2      Sample
#> 35          S13     1  E02  Control    TRUE         0          1      Sample
#> 36         S130     2  B06      Cin    TRUE       100          2      Sample
#> 37         S131     2  C06      Cin    TRUE         1          2      Sample
#> 38         S132     2  D06      Cry    TRUE         1          2      Sample
#> 39         S133     2  E06      Cin   FALSE         1          2      Sample
#> 40         S134     2  F06      Cin   FALSE       100          2      Sample
#> 41         S135     2  G06      Cry   FALSE       100          2      Sample
#> 42         S136     2  H06      Plu    TRUE         1          2      Sample
#> 43         S137     2  B07      Cry    TRUE       100          2      Sample
#> 44         S138     2  C07      Plu    TRUE       100          2      Sample
#> 45         S139     2  D07      Cin    TRUE       100          2      Sample
#> 46          S14     1  F02      Cin    TRUE         1          1      Sample
#> 47         S140     2  E07      Plu   FALSE         1          2      Sample
#> 48         S141     2  F07      Cry    TRUE       100          2      Sample
#> 49         S142     2  G07  Control   FALSE         0          2      Sample
#> 50         S143     2  H07      Cry   FALSE         1          2      Sample
#> 51         S144     2  A08  Control   FALSE         0          2      Sample
#> 52         S145     2  B08      Plu   FALSE         1          2      Sample
#> 53         S146     2  C08      Cin   FALSE       100          2      Sample
#> 54         S147     2  D08      Cry   FALSE       100          2      Sample
#> 55         S148     2  E08      Cry   FALSE         1          2      Sample
#> 56         S149     2  F08      Plu   FALSE       100          2      Sample
#> 57          S15     1  H02      Cin   FALSE       100          1      Sample
#> 58         S150     2  G08  Control    TRUE         0          2      Sample
#> 59         S151     2  H08      Cin    TRUE         1          2      Sample
#> 60         S152     2  A09      Cry    TRUE         1          2      Sample
#> 61         S153     2  B09      Plu    TRUE         1          2      Sample
#> 62         S154     2  C09      Cin   FALSE       100          2      Sample
#> 63         S156     2  D09      Cin   FALSE         1          2      Sample
#> 64         S157     2  E09      Cry   FALSE         1          2      Sample
#> 65         S158     2  F09      Cry    TRUE         1          2      Sample
#> 66         S159     2  H09      Plu   FALSE       100          2      Sample
#> 67          S16     1  A03      Cry   FALSE       100          1      Sample
#> 68         S160     2  A10  Control   FALSE         0          2      Sample
#> 69         S161     2  B10      Cry   FALSE       100          2      Sample
#> 70         S162     2  C10      Cin   FALSE         1          2      Sample
#> 71         S163     2  D10      Plu    TRUE       100          2      Sample
#> 72         S164     2  E10      Plu    TRUE         1          2      Sample
#> 73         S165     2  F10      Cin   FALSE         1          2      Sample
#> 74         S166     2  G10      Plu   FALSE       100          2      Sample
#> 75         S167     2  H10      Plu    TRUE       100          2      Sample
#> 76         S168     2  A11      Cin    TRUE         1          2      Sample
#> 77         S169     2  B11      Cin   FALSE       100          2      Sample
#> 78          S17     1  B03      Cry    TRUE       100          1      Sample
#> 79          S18     1  C03  Control    TRUE         0          1      Sample
#> 80           S2     1  B01      Cin   FALSE         1          1      Sample
#> 81          S20     1  E03      Plu   FALSE       100          1      Sample
#> 82          S21     1  F03      Cry   FALSE         1          1      Sample
#> 83          S22     1  G03      Cry    TRUE       100          1      Sample
#> 84          S23     1  H03      Plu    TRUE       100          1      Sample
#> 85          S24     1  A04      Cin   FALSE       100          1      Sample
#> 86          S25     1  B04  Control   FALSE         0          1      Sample
#> 87          S26     1  C04  Control   FALSE         0          1      Sample
#> 88          S27     1  D04      Cin   FALSE       100          1      Sample
#> 89          S28     1  E04      Cin   FALSE       100          1      Sample
#> 90          S29     1  F04      Cin   FALSE       100          1      Sample
#> 91           S3     1  C01      Plu    TRUE         1          1      Sample
#> 92          S31     1  H04      Plu   FALSE       100          1      Sample
#> 93          S32     1  A05      Cry   FALSE       100          1      Sample
#> 94          S33     1  B05  Control   FALSE         0          1      Sample
#> 95          S34     1  C05      Plu   FALSE       100          1      Sample
#> 96          S35     1  E05  Control    TRUE         0          1      Sample
#> 97          S36     1  F05      Plu    TRUE       100          1      Sample
#> 98          S37     1  G05      Cry    TRUE       100          1      Sample
#> 99          S38     1  H05      Cry    TRUE         1          1      Sample
#> 100         S39     1  A06      Cin    TRUE         1          1      Sample
#> 101          S4     1  D01      Plu    TRUE         1          1      Sample
#> 102         S40     1  B06      Cry   FALSE         1          1      Sample
#> 103         S41     1  C06      Cry   FALSE       100          1      Sample
#> 104         S42     1  D06      Plu    TRUE         1          1      Sample
#> 105         S43     1  E06      Cry   FALSE         1          1      Sample
#> 106         S45     1  G06      Cin    TRUE       100          1      Sample
#> 107         S46     1  H06      Cin    TRUE         1          1      Sample
#> 108         S47     1  A07      Plu    TRUE       100          1      Sample
#> 109         S48     1  B07      Cin    TRUE       100          1      Sample
#> 110         S49     1  C07      Cin    TRUE         1          1      Sample
#> 111          S5     1  E01      Cin   FALSE         1          1      Sample
#> 112         S50     1  D07  Control    TRUE         0          1      Sample
#> 113         S51     1  E07      Cry   FALSE         1          1      Sample
#> 114         S52     1  F07      Plu    TRUE         1          1      Sample
#> 115         S53     1  G07      Plu   FALSE       100          1      Sample
#> 116         S54     1  A08      Plu   FALSE       100          1      Sample
#> 117         S55     1  B08      Plu   FALSE       100          1      Sample
#> 118         S56     1  C08      Cry    TRUE       100          1      Sample
#> 119         S57     1  D08      Cry   FALSE         1          1      Sample
#> 120         S58     1  E08      Cin   FALSE         1          1      Sample
#> 121         S59     1  F08      Cry   FALSE         1          1      Sample
#> 122          S6     1  F01      Cry    TRUE         1          1      Sample
#> 123         S60     1  G08      Cin   FALSE         1          1      Sample
#> 124         S61     1  H08      Cin   FALSE         1          1      Sample
#> 125         S62     1  A09      Cry    TRUE         1          1      Sample
#> 126         S63     1  B09      Cin    TRUE         1          1      Sample
#> 127         S64     1  C09  Control   FALSE         0          1      Sample
#> 128         S65     1  D09      Plu    TRUE       100          1      Sample
#> 129         S66     1  E09      Cin    TRUE         1          1      Sample
#> 130         S68     1  G09      Plu   FALSE         1          1      Sample
#> 131         S69     1  H09      Plu   FALSE         1          1      Sample
#> 132          S7     1  G01      Cry    TRUE         1          1      Sample
#> 133         S70     1  A10      Plu   FALSE         1          1      Sample
#> 134         S72     1  D10      Plu   FALSE         1          1      Sample
#> 135         S73     1  E10  Control   FALSE         0          1      Sample
#> 136         S74     1  F10  Control    TRUE         0          1      Sample
#> 137         S76     1  H10      Cin   FALSE         1          1      Sample
#> 138         S77     1  A11      Plu   FALSE       100          1      Sample
#> 139         S78     1  B11  Control   FALSE         0          1      Sample
#> 140         S79     1  C11  Control    TRUE         0          1      Sample
#> 141          S8     1  H01      Cin    TRUE       100          1      Sample
#> 142         S80     1  D11      Cry    TRUE         1          1      Sample
#> 143         S81     1  E11      Cin   FALSE       100          1      Sample
#> 144         S82     1  F11      Cry    TRUE       100          1      Sample
#> 145         S83     1  G11      Cry   FALSE       100          1      Sample
#> 146         S84     1  H11      Cin    TRUE       100          1      Sample
#> 147         S85     1  A12      Plu   FALSE         1          2      Sample
#> 148         S86     1  B12      Plu    TRUE         1          2      Sample
#> 149         S87     1  C12      Cry   FALSE       100          2      Sample
#> 150         S88     1  D12      Cin    TRUE       100          2      Sample
#> 151         S89     1  E12      Cry    TRUE       100          2      Sample
#> 152          S9     1  A02      Cry   FALSE       100          1      Sample
#> 153         S90     1  F12      Cry    TRUE       100          2      Sample
#> 154         S91     2  A01  Control   FALSE         0          2      Sample
#> 155         S92     2  B01      Cry   FALSE         1          2      Sample
#> 156         S93     2  C01      Cin   FALSE       100          2      Sample
#> 157         S94     2  D01  Control   FALSE         0          2      Sample
#> 158         S95     2  E01  Control    TRUE         0          2      Sample
#> 159         S96     2  F01      Cin    TRUE         1          2      Sample
#> 160         S97     2  G01      Cin    TRUE       100          2      Sample
#> 161         S98     2  H01      Plu    TRUE       100          2      Sample
#> 162         S99     2  A02      Cin   FALSE         1          2      Sample
#>     is_ambiguous     cin_prop     cry_prop    plu_prop expected_innoc_prop
#> 1          FALSE 0.000000e+00 0.0032119351 0.000000000        0.0032119351
#> 2          FALSE 0.000000e+00 0.0000000000 0.000000000        0.0000000000
#> 3          FALSE 0.000000e+00 0.0000000000 0.000000000        0.0000000000
#> 4          FALSE 0.000000e+00 0.0681409604 0.009248630        0.0092486300
#> 5          FALSE 2.104353e-04 0.0000000000 0.000000000        0.0000000000
#> 6          FALSE 2.356816e-03 0.0000000000 0.000000000        0.0023568163
#> 7          FALSE 0.000000e+00 0.0000000000 0.000000000        0.0000000000
#> 8          FALSE 0.000000e+00 0.0137158699 0.000000000        0.0137158699
#> 9          FALSE 0.000000e+00 0.0000000000 0.012172750        0.0121727499
#> 10         FALSE 8.617377e-01 0.0000000000 0.000000000        0.8617376934
#> 11         FALSE 1.903449e-03 0.0000000000 0.000000000                  NA
#> 12         FALSE 8.884608e-02 0.0000000000 0.000000000        0.0888460788
#> 13         FALSE 0.000000e+00 0.0000000000 0.000000000        0.0000000000
#> 14         FALSE 1.485791e-04 0.0022756064 0.000000000        0.0022756064
#> 15         FALSE 6.575360e-01 0.0402978596 0.000000000        0.6575360117
#> 16         FALSE 5.599046e-04 0.0010161232 0.000000000                  NA
#> 17         FALSE 2.965214e-04 0.0000000000 0.000000000        0.0000000000
#> 18         FALSE 0.000000e+00 0.0000000000 0.000000000        0.0000000000
#> 19         FALSE 0.000000e+00 0.0000000000 0.000000000                  NA
#> 20         FALSE 2.239594e-04 0.0000000000 0.016434353        0.0164343532
#> 21         FALSE 0.000000e+00 0.0006071162 0.000000000        0.0006071162
#> 22         FALSE 0.000000e+00 0.0000000000 0.000000000        0.0000000000
#> 23         FALSE 0.000000e+00 0.0000000000 0.000000000                  NA
#> 24         FALSE 0.000000e+00 0.0341041618 0.000000000        0.0000000000
#> 25         FALSE 0.000000e+00 0.0000000000 0.000000000        0.0000000000
#> 26         FALSE 8.058991e-01 0.0000000000 0.000000000        0.8058990655
#> 27         FALSE 8.216578e-04 0.0000000000 0.000000000        0.0000000000
#> 28         FALSE 7.052493e-04 0.0000000000 0.000000000        0.0000000000
#> 29         FALSE 8.237596e-04 0.1323752225 0.000000000        0.1323752225
#> 30         FALSE 0.000000e+00 0.0000000000 0.000000000        0.0000000000
#> 31         FALSE 0.000000e+00 0.0096353017 0.000000000        0.0000000000
#> 32         FALSE 4.516005e-03 0.0000000000 0.000000000        0.0000000000
#> 33         FALSE 0.000000e+00 0.0000000000 0.000000000        0.0000000000
#> 34         FALSE 1.260323e-04 0.0022420484 0.000000000                  NA
#> 35         FALSE 0.000000e+00 0.0000000000 0.000000000                  NA
#> 36         FALSE 8.338906e-01 0.0000000000 0.000000000        0.8338905565
#> 37         FALSE 3.360055e-01 0.0000000000 0.000000000        0.3360055474
#> 38         FALSE 1.061727e-03 0.0014822134 0.000000000        0.0014822134
#> 39         FALSE 5.096104e-01 0.0038817338 0.000000000        0.5096103556
#> 40         FALSE 8.038871e-01 0.0598278485 0.000000000        0.8038871094
#> 41         FALSE 5.952775e-04 0.0000000000 0.000000000        0.0000000000
#> 42         FALSE 0.000000e+00 0.0000000000 0.000000000        0.0000000000
#> 43         FALSE 3.314016e-04 0.0213619712 0.000000000        0.0213619712
#> 44         FALSE 2.953856e-04 0.0000000000 0.009667891        0.0096678908
#> 45         FALSE 6.821178e-01 0.0000000000 0.000000000        0.6821178209
#> 46         FALSE 0.000000e+00 0.0000000000 0.000000000        0.0000000000
#> 47         FALSE 0.000000e+00 0.0000000000 0.000000000        0.0000000000
#> 48         FALSE 0.000000e+00 0.0190983882 0.000000000        0.0190983882
#> 49         FALSE 3.565927e-04 0.0000000000 0.000000000                  NA
#> 50         FALSE 0.000000e+00 0.0000000000 0.000000000        0.0000000000
#> 51         FALSE 1.526441e-04 0.0000000000 0.000000000                  NA
#> 52         FALSE 9.834957e-04 0.0000000000 0.000000000        0.0000000000
#> 53         FALSE 8.702460e-01 0.0000000000 0.000000000        0.8702459781
#> 54         FALSE 1.032316e-03 0.0039198085 0.000000000        0.0039198085
#> 55         FALSE 4.233806e-04 0.0000000000 0.000000000        0.0000000000
#> 56         FALSE 3.401466e-04 0.0000000000 0.000000000        0.0000000000
#> 57         FALSE 6.002560e-01 0.0000000000 0.000000000        0.6002559521
#> 58         FALSE 0.000000e+00 0.0096928783 0.000000000                  NA
#> 59         FALSE 0.000000e+00 0.0000000000 0.000000000        0.0000000000
#> 60         FALSE 9.091426e-05 0.1694300813 0.000000000        0.1694300813
#> 61         FALSE 2.342743e-03 0.0000000000 0.000000000        0.0000000000
#> 62         FALSE 7.245918e-01 0.0000000000 0.000000000        0.7245917650
#> 63         FALSE 3.705783e-01 0.0000000000 0.000000000        0.3705782938
#> 64         FALSE 4.948334e-04 0.0000000000 0.000000000        0.0000000000
#> 65         FALSE 5.876028e-04 0.0000000000 0.000000000        0.0000000000
#> 66         FALSE 0.000000e+00 0.0000000000 0.000000000        0.0000000000
#> 67         FALSE 0.000000e+00 0.5248189051 0.000000000        0.5248189051
#> 68         FALSE 0.000000e+00 0.0000000000 0.000000000                  NA
#> 69         FALSE 0.000000e+00 0.0000000000 0.000000000        0.0000000000
#> 70         FALSE 2.646929e-01 0.0000000000 0.000000000        0.2646929245
#> 71         FALSE 4.594005e-04 0.0387475594 0.001981165        0.0019811646
#> 72         FALSE 9.639821e-04 0.0000000000 0.000000000        0.0000000000
#> 73         FALSE 0.000000e+00 0.0999563628 0.000000000        0.0000000000
#> 74         FALSE 0.000000e+00 0.0000000000 0.002561376        0.0025613757
#> 75         FALSE 0.000000e+00 0.0000000000 0.000000000        0.0000000000
#> 76         FALSE 5.927190e-04 0.0000000000 0.000000000        0.0005927190
#> 77         FALSE 7.771796e-01 0.0000000000 0.000000000        0.7771795912
#> 78         FALSE 0.000000e+00 0.1070520397 0.000000000        0.1070520397
#> 79         FALSE 0.000000e+00 0.1214449744 0.000000000                  NA
#> 80         FALSE 0.000000e+00 0.2140226089 0.000000000        0.0000000000
#> 81         FALSE 0.000000e+00 0.0000000000 0.025472540        0.0254725400
#> 82         FALSE 0.000000e+00 0.0000000000 0.000000000        0.0000000000
#> 83         FALSE 0.000000e+00 0.1758458754 0.000000000        0.1758458754
#> 84         FALSE 0.000000e+00 0.0000000000 0.001552998        0.0015529984
#> 85         FALSE 3.306350e-01 0.0000000000 0.000000000        0.3306349557
#> 86         FALSE 3.615166e-04 0.0000000000 0.000000000                  NA
#> 87         FALSE 0.000000e+00 0.0000000000 0.000000000                  NA
#> 88         FALSE 2.491744e-01 0.0000000000 0.000000000        0.2491743614
#> 89         FALSE 1.839319e-01 0.0000000000 0.000000000        0.1839319005
#> 90         FALSE 5.419236e-01 0.0000000000 0.000000000        0.5419235512
#> 91         FALSE 0.000000e+00 0.0000000000 0.000000000        0.0000000000
#> 92         FALSE 2.819681e-04 0.0000000000 0.000000000        0.0000000000
#> 93         FALSE 0.000000e+00 0.0703178991 0.000000000        0.0703178991
#> 94         FALSE 0.000000e+00 0.0000000000 0.000000000                  NA
#> 95         FALSE 0.000000e+00 0.0000000000 0.000000000        0.0000000000
#> 96         FALSE 0.000000e+00 0.0000000000 0.000000000                  NA
#> 97         FALSE 0.000000e+00 0.0183459671 0.000000000        0.0000000000
#> 98         FALSE 0.000000e+00 0.0128762783 0.000000000        0.0128762783
#> 99         FALSE 0.000000e+00 0.0000000000 0.000000000        0.0000000000
#> 100        FALSE 0.000000e+00 0.0000000000 0.000000000        0.0000000000
#> 101        FALSE 4.498749e-04 0.0122006082 0.000000000        0.0000000000
#> 102        FALSE 0.000000e+00 0.0000000000 0.000000000        0.0000000000
#> 103        FALSE 0.000000e+00 0.1319423968 0.000000000        0.1319423968
#> 104        FALSE 0.000000e+00 0.0000000000 0.000000000        0.0000000000
#> 105        FALSE 0.000000e+00 0.1122069523 0.000000000        0.1122069523
#> 106        FALSE 8.016574e-01 0.0000000000 0.000000000        0.8016574239
#> 107        FALSE 2.072690e-04 0.0256647850 0.000000000        0.0002072690
#> 108        FALSE 0.000000e+00 0.0000000000 0.005374538        0.0053745381
#> 109        FALSE 7.775173e-01 0.0379334258 0.000000000        0.7775173370
#> 110        FALSE 0.000000e+00 0.5405549029 0.000000000        0.0000000000
#> 111        FALSE 0.000000e+00 0.0000000000 0.000000000        0.0000000000
#> 112        FALSE 1.805293e-04 0.0000000000 0.000000000                  NA
#> 113        FALSE 0.000000e+00 0.0000000000 0.000000000        0.0000000000
#> 114        FALSE 0.000000e+00 0.0000000000 0.000000000        0.0000000000
#> 115        FALSE 0.000000e+00 0.0000000000 0.000000000        0.0000000000
#> 116        FALSE 0.000000e+00 0.0000000000 0.000000000        0.0000000000
#> 117        FALSE 0.000000e+00 0.0000000000 0.002114817        0.0021148169
#> 118        FALSE 0.000000e+00 0.0084429179 0.000000000        0.0084429179
#> 119        FALSE 0.000000e+00 0.0000000000 0.000000000        0.0000000000
#> 120        FALSE 0.000000e+00 0.0000000000 0.000000000        0.0000000000
#> 121        FALSE 0.000000e+00 0.0000000000 0.000000000        0.0000000000
#> 122        FALSE 2.324680e-04 0.0000000000 0.000000000        0.0000000000
#> 123        FALSE 0.000000e+00 0.0000000000 0.000000000        0.0000000000
#> 124        FALSE 0.000000e+00 0.0000000000 0.000000000        0.0000000000
#> 125        FALSE 0.000000e+00 0.0000000000 0.000000000        0.0000000000
#> 126        FALSE 0.000000e+00 0.0000000000 0.000000000        0.0000000000
#> 127        FALSE 0.000000e+00 0.0000000000 0.000000000                  NA
#> 128        FALSE 2.753304e-04 0.0053383505 0.001514317        0.0015143172
#> 129        FALSE 3.134169e-02 0.0017392333 0.000000000        0.0313416943
#> 130        FALSE 0.000000e+00 0.0000000000 0.000000000        0.0000000000
#> 131        FALSE 5.528025e-04 0.0000000000 0.000000000        0.0000000000
#> 132        FALSE 0.000000e+00 0.0000000000 0.000000000        0.0000000000
#> 133        FALSE 0.000000e+00 0.0000000000 0.000000000        0.0000000000
#> 134        FALSE 0.000000e+00 0.0000000000 0.000000000        0.0000000000
#> 135        FALSE 4.605775e-03 0.0000000000 0.000000000                  NA
#> 136        FALSE 0.000000e+00 0.1818410663 0.000000000                  NA
#> 137        FALSE 4.535007e-03 0.0753929304 0.000000000        0.0045350065
#> 138        FALSE 0.000000e+00 0.0000000000 0.029522111        0.0295221108
#> 139        FALSE 1.419265e-04 0.0000000000 0.000000000                  NA
#> 140        FALSE 0.000000e+00 0.0008006049 0.000000000                  NA
#> 141        FALSE 8.299622e-01 0.0000000000 0.000000000        0.8299622226
#> 142        FALSE 1.934727e-04 0.0000000000 0.000000000        0.0000000000
#> 143        FALSE 4.375981e-01 0.0000000000 0.000000000        0.4375981106
#> 144        FALSE 0.000000e+00 0.0051668883 0.000000000        0.0051668883
#> 145        FALSE 0.000000e+00 0.0000000000 0.000000000        0.0000000000
#> 146        FALSE 6.952040e-01 0.0000000000 0.000000000        0.6952040387
#> 147        FALSE 0.000000e+00 0.0045444976 0.000000000        0.0000000000
#> 148        FALSE 1.398813e-04 0.1220697775 0.000000000        0.0000000000
#> 149        FALSE 0.000000e+00 0.0015766494 0.000000000        0.0015766494
#> 150        FALSE 8.364097e-01 0.0000000000 0.000000000        0.8364097093
#> 151        FALSE 0.000000e+00 0.0062249850 0.000000000        0.0062249850
#> 152        FALSE 0.000000e+00 0.0000000000 0.000000000        0.0000000000
#> 153        FALSE 3.433582e-04 0.0202405268 0.000000000        0.0202405268
#> 154        FALSE 3.666286e-04 0.0000000000 0.000000000                  NA
#> 155        FALSE 6.114546e-04 0.0000000000 0.000000000        0.0000000000
#> 156        FALSE 8.525916e-01 0.0000000000 0.000000000        0.8525915810
#> 157        FALSE 1.288285e-03 0.0000000000 0.000000000                  NA
#> 158        FALSE 0.000000e+00 0.0000000000 0.000000000                  NA
#> 159        FALSE 7.127968e-02 0.0000000000 0.000000000        0.0712796768
#> 160        FALSE 8.482561e-01 0.0000000000 0.000000000        0.8482560850
#> 161        FALSE 0.000000e+00 0.0000000000 0.330256227        0.3302562274
#> 162        FALSE 0.000000e+00 0.0000000000 0.000000000        0.0000000000
#>     unexpected_innoc_prop expected_innoc only_expected_innoc valid_inoc
#> 1            0.000000e+00           TRUE                TRUE       TRUE
#> 2            0.000000e+00          FALSE               FALSE      FALSE
#> 3            0.000000e+00          FALSE               FALSE      FALSE
#> 4            6.814096e-02           TRUE               FALSE      FALSE
#> 5            2.104353e-04          FALSE               FALSE      FALSE
#> 6            0.000000e+00           TRUE                TRUE       TRUE
#> 7            0.000000e+00          FALSE               FALSE      FALSE
#> 8            0.000000e+00           TRUE                TRUE       TRUE
#> 9            0.000000e+00           TRUE                TRUE       TRUE
#> 10           0.000000e+00           TRUE                TRUE       TRUE
#> 11           1.903449e-03          FALSE               FALSE      FALSE
#> 12           0.000000e+00           TRUE                TRUE       TRUE
#> 13           0.000000e+00          FALSE               FALSE      FALSE
#> 14           1.485791e-04           TRUE               FALSE      FALSE
#> 15           4.029786e-02           TRUE               FALSE      FALSE
#> 16           1.576028e-03          FALSE               FALSE      FALSE
#> 17           2.965214e-04          FALSE               FALSE      FALSE
#> 18           0.000000e+00          FALSE               FALSE      FALSE
#> 19           0.000000e+00           TRUE                TRUE       TRUE
#> 20           2.239594e-04           TRUE               FALSE      FALSE
#> 21           0.000000e+00           TRUE                TRUE       TRUE
#> 22           0.000000e+00          FALSE               FALSE      FALSE
#> 23           0.000000e+00           TRUE                TRUE       TRUE
#> 24           3.410416e-02          FALSE               FALSE      FALSE
#> 25           0.000000e+00          FALSE               FALSE      FALSE
#> 26           0.000000e+00           TRUE                TRUE       TRUE
#> 27           8.216578e-04          FALSE               FALSE      FALSE
#> 28           7.052493e-04          FALSE               FALSE      FALSE
#> 29           8.237596e-04           TRUE               FALSE      FALSE
#> 30           0.000000e+00          FALSE               FALSE      FALSE
#> 31           9.635302e-03          FALSE               FALSE      FALSE
#> 32           4.516005e-03          FALSE               FALSE      FALSE
#> 33           0.000000e+00          FALSE               FALSE      FALSE
#> 34           2.368081e-03          FALSE               FALSE      FALSE
#> 35           0.000000e+00           TRUE                TRUE       TRUE
#> 36           0.000000e+00           TRUE                TRUE       TRUE
#> 37           0.000000e+00           TRUE                TRUE       TRUE
#> 38           1.061727e-03           TRUE               FALSE      FALSE
#> 39           3.881734e-03           TRUE               FALSE      FALSE
#> 40           5.982785e-02           TRUE               FALSE      FALSE
#> 41           5.952775e-04          FALSE               FALSE      FALSE
#> 42           0.000000e+00          FALSE               FALSE      FALSE
#> 43           3.314016e-04           TRUE               FALSE      FALSE
#> 44           2.953856e-04           TRUE               FALSE      FALSE
#> 45           0.000000e+00           TRUE                TRUE       TRUE
#> 46           0.000000e+00          FALSE               FALSE      FALSE
#> 47           0.000000e+00          FALSE               FALSE      FALSE
#> 48           0.000000e+00           TRUE                TRUE       TRUE
#> 49           3.565927e-04          FALSE               FALSE      FALSE
#> 50           0.000000e+00          FALSE               FALSE      FALSE
#> 51           1.526441e-04          FALSE               FALSE      FALSE
#> 52           9.834957e-04          FALSE               FALSE      FALSE
#> 53           0.000000e+00           TRUE                TRUE       TRUE
#> 54           1.032316e-03           TRUE               FALSE      FALSE
#> 55           4.233806e-04          FALSE               FALSE      FALSE
#> 56           3.401466e-04          FALSE               FALSE      FALSE
#> 57           0.000000e+00           TRUE                TRUE       TRUE
#> 58           9.692878e-03          FALSE               FALSE      FALSE
#> 59           0.000000e+00          FALSE               FALSE      FALSE
#> 60           9.091426e-05           TRUE               FALSE       TRUE
#> 61           2.342743e-03          FALSE               FALSE      FALSE
#> 62           0.000000e+00           TRUE                TRUE       TRUE
#> 63           0.000000e+00           TRUE                TRUE       TRUE
#> 64           4.948334e-04          FALSE               FALSE      FALSE
#> 65           5.876028e-04          FALSE               FALSE      FALSE
#> 66           0.000000e+00          FALSE               FALSE      FALSE
#> 67           0.000000e+00           TRUE                TRUE       TRUE
#> 68           0.000000e+00           TRUE                TRUE       TRUE
#> 69           0.000000e+00          FALSE               FALSE      FALSE
#> 70           0.000000e+00           TRUE                TRUE       TRUE
#> 71           3.920696e-02           TRUE               FALSE      FALSE
#> 72           9.639821e-04          FALSE               FALSE      FALSE
#> 73           9.995636e-02          FALSE               FALSE      FALSE
#> 74           0.000000e+00           TRUE                TRUE       TRUE
#> 75           0.000000e+00          FALSE               FALSE      FALSE
#> 76           0.000000e+00           TRUE                TRUE       TRUE
#> 77           0.000000e+00           TRUE                TRUE       TRUE
#> 78           0.000000e+00           TRUE                TRUE       TRUE
#> 79           1.214450e-01          FALSE               FALSE      FALSE
#> 80           2.140226e-01          FALSE               FALSE      FALSE
#> 81           0.000000e+00           TRUE                TRUE       TRUE
#> 82           0.000000e+00          FALSE               FALSE      FALSE
#> 83           0.000000e+00           TRUE                TRUE       TRUE
#> 84           0.000000e+00           TRUE                TRUE       TRUE
#> 85           0.000000e+00           TRUE                TRUE       TRUE
#> 86           3.615166e-04          FALSE               FALSE      FALSE
#> 87           0.000000e+00           TRUE                TRUE       TRUE
#> 88           0.000000e+00           TRUE                TRUE       TRUE
#> 89           0.000000e+00           TRUE                TRUE       TRUE
#> 90           0.000000e+00           TRUE                TRUE       TRUE
#> 91           0.000000e+00          FALSE               FALSE      FALSE
#> 92           2.819681e-04          FALSE               FALSE      FALSE
#> 93           0.000000e+00           TRUE                TRUE       TRUE
#> 94           0.000000e+00           TRUE                TRUE       TRUE
#> 95           0.000000e+00          FALSE               FALSE      FALSE
#> 96           0.000000e+00           TRUE                TRUE       TRUE
#> 97           1.834597e-02          FALSE               FALSE      FALSE
#> 98           0.000000e+00           TRUE                TRUE       TRUE
#> 99           0.000000e+00          FALSE               FALSE      FALSE
#> 100          0.000000e+00          FALSE               FALSE      FALSE
#> 101          1.265048e-02          FALSE               FALSE      FALSE
#> 102          0.000000e+00          FALSE               FALSE      FALSE
#> 103          0.000000e+00           TRUE                TRUE       TRUE
#> 104          0.000000e+00          FALSE               FALSE      FALSE
#> 105          0.000000e+00           TRUE                TRUE       TRUE
#> 106          0.000000e+00           TRUE                TRUE       TRUE
#> 107          2.566478e-02           TRUE               FALSE      FALSE
#> 108          0.000000e+00           TRUE                TRUE       TRUE
#> 109          3.793343e-02           TRUE               FALSE      FALSE
#> 110          5.405549e-01          FALSE               FALSE      FALSE
#> 111          0.000000e+00          FALSE               FALSE      FALSE
#> 112          1.805293e-04          FALSE               FALSE      FALSE
#> 113          0.000000e+00          FALSE               FALSE      FALSE
#> 114          0.000000e+00          FALSE               FALSE      FALSE
#> 115          0.000000e+00          FALSE               FALSE      FALSE
#> 116          0.000000e+00          FALSE               FALSE      FALSE
#> 117          0.000000e+00           TRUE                TRUE       TRUE
#> 118          0.000000e+00           TRUE                TRUE       TRUE
#> 119          0.000000e+00          FALSE               FALSE      FALSE
#> 120          0.000000e+00          FALSE               FALSE      FALSE
#> 121          0.000000e+00          FALSE               FALSE      FALSE
#> 122          2.324680e-04          FALSE               FALSE      FALSE
#> 123          0.000000e+00          FALSE               FALSE      FALSE
#> 124          0.000000e+00          FALSE               FALSE      FALSE
#> 125          0.000000e+00          FALSE               FALSE      FALSE
#> 126          0.000000e+00          FALSE               FALSE      FALSE
#> 127          0.000000e+00           TRUE                TRUE       TRUE
#> 128          5.613681e-03           TRUE               FALSE      FALSE
#> 129          1.739233e-03           TRUE               FALSE      FALSE
#> 130          0.000000e+00          FALSE               FALSE      FALSE
#> 131          5.528025e-04          FALSE               FALSE      FALSE
#> 132          0.000000e+00          FALSE               FALSE      FALSE
#> 133          0.000000e+00          FALSE               FALSE      FALSE
#> 134          0.000000e+00          FALSE               FALSE      FALSE
#> 135          4.605775e-03          FALSE               FALSE      FALSE
#> 136          1.818411e-01          FALSE               FALSE      FALSE
#> 137          7.539293e-02           TRUE               FALSE      FALSE
#> 138          0.000000e+00           TRUE                TRUE       TRUE
#> 139          1.419265e-04          FALSE               FALSE      FALSE
#> 140          8.006049e-04          FALSE               FALSE      FALSE
#> 141          0.000000e+00           TRUE                TRUE       TRUE
#> 142          1.934727e-04          FALSE               FALSE      FALSE
#> 143          0.000000e+00           TRUE                TRUE       TRUE
#> 144          0.000000e+00           TRUE                TRUE       TRUE
#> 145          0.000000e+00          FALSE               FALSE      FALSE
#> 146          0.000000e+00           TRUE                TRUE       TRUE
#> 147          4.544498e-03          FALSE               FALSE      FALSE
#> 148          1.222097e-01          FALSE               FALSE      FALSE
#> 149          0.000000e+00           TRUE                TRUE       TRUE
#> 150          0.000000e+00           TRUE                TRUE       TRUE
#> 151          0.000000e+00           TRUE                TRUE       TRUE
#> 152          0.000000e+00          FALSE               FALSE      FALSE
#> 153          3.433582e-04           TRUE               FALSE      FALSE
#> 154          3.666286e-04          FALSE               FALSE      FALSE
#> 155          6.114546e-04          FALSE               FALSE      FALSE
#> 156          0.000000e+00           TRUE                TRUE       TRUE
#> 157          1.288285e-03          FALSE               FALSE      FALSE
#> 158          0.000000e+00           TRUE                TRUE       TRUE
#> 159          0.000000e+00           TRUE                TRUE       TRUE
#> 160          0.000000e+00           TRUE                TRUE       TRUE
#> 161          0.000000e+00           TRUE                TRUE       TRUE
#> 162          0.000000e+00          FALSE               FALSE      FALSE
#>     raw_count richness  shannon invsimpson
#> 1       56792       88 2.153433   5.642725
#> 2       35094       89 2.010482   3.255848
#> 3       50052       92 2.313394   5.991114
#> 4       42596      116 2.556608   6.930950
#> 5       85519      106 2.345237   5.789557
#> 6       70268      161 2.341437   4.051853
#> 7       52090       96 1.452460   1.862109
#> 8       54075       92 2.573755   8.445344
#> 9       51693      103 1.946426   3.574197
#> 10      25517       99 2.762789   9.372035
#> 11      66594      109 2.304615   4.861717
#> 12     107364      115 2.085041   5.069044
#> 13      29103       83 2.610991   8.052769
#> 14     127568      153 2.651190   6.766775
#> 15      44555      131 3.098712  10.769495
#> 16      96293       90 1.768492   3.323680
#> 17      53943      101 1.752296   3.197930
#> 18      61275      129 2.301575   3.658643
#> 19      55156       77 2.398226   7.921531
#> 20      92205       61 1.514355   3.213147
#> 21     100414       68 1.558421   3.139064
#> 22      44403       59 1.522436   2.431314
#> 23      60550       65 1.598898   2.753033
#> 24      57975       65 1.541798   2.251030
#> 25      26000       81 2.198707   4.348543
#> 26      23638       74 2.575201   7.418115
#> 27      70531      141 2.558644   5.182404
#> 28     114772      117 1.390062   1.887378
#> 29     117852      109 1.647392   3.247214
#> 30     103530      126 2.518675   6.154125
#> 31     159214      114 1.862766   3.776559
#> 32      72523       93 1.832609   3.403918
#> 33      49524       63 2.113848   5.106784
#> 34     150398      127 1.962830   3.250105
#> 35      46829      126 2.466678   6.305792
#> 36      25289      109 2.984704  10.500101
#> 37      78520      134 2.728416   7.571191
#> 38      94886      135 2.458813   7.154550
#> 39      59533      135 2.558063   6.442152
#> 40      14424       73 2.373343   4.518265
#> 41      75550      114 2.416444   6.242318
#> 42      21536       29 1.514916   3.317566
#> 43     109225       69 1.602980   2.424974
#> 44     124012      130 1.639556   2.310273
#> 45      65389      147 2.500037   5.458671
#> 46      68190       71 1.572326   2.988710
#> 47      94920      131 2.575501   6.049414
#> 48      93476      101 1.854711   3.684141
#> 49      70083      150 2.318448   3.979918
#> 50      46976       86 2.095955   3.973083
#> 51     104803      139 2.557721   6.215693
#> 52      65010      137 2.358371   3.904012
#> 53      21712      145 3.466030  16.541237
#> 54      66509      129 2.463312   6.637095
#> 55      80272      175 3.053075  10.396626
#> 56      96984      206 2.777841   5.440807
#> 57      16555       52 1.796011   3.020966
#> 58     118822      107 1.707368   2.691869
#> 59      71485       72 1.899511   4.607572
#> 60     146156      123 1.135382   1.643594
#> 61     139253      147 2.509818   7.873239
#> 62      46246      114 2.821021   9.650663
#> 63      85658       92 1.732665   2.665200
#> 64     103014      180 2.583387   6.203608
#> 65      79939      137 1.986289   3.092920
#> 66      85884      132 2.026281   3.007894
#> 67      24009       86 2.688169   8.560356
#> 68      65809      120 2.655521   8.448870
#> 69      71949      133 2.385226   4.220238
#> 70      58415      120 2.488638   6.101319
#> 71      66787      111 2.421314   6.946012
#> 72      68400      100 1.728315   2.790616
#> 73      66002      134 2.668614   8.146460
#> 74      66590       92 2.310277   5.247890
#> 75      46289       72 1.762806   3.174792
#> 76      84307      143 2.695193   7.972655
#> 77      54384      159 2.756660   7.990911
#> 78      49864      112 2.219509   4.039626
#> 79      69629      118 2.359181   4.557221
#> 80      37128      105 2.759988   8.795219
#> 81      36039       99 2.050288   3.608854
#> 82      41086      142 2.956628   9.222725
#> 83      65109       88 2.150016   5.182877
#> 84      53362       58 1.744124   2.965370
#> 85      33323      139 2.774747   4.966373
#> 86      44242      106 1.819239   2.518617
#> 87      86935      102 1.845723   2.864478
#> 88      77980      110 2.528470   5.216023
#> 89      69600      142 3.242399  12.658888
#> 90      40865      129 3.024593   9.622248
#> 91      65271       72 1.485296   2.186816
#> 92      56728       99 2.363015   6.651952
#> 93      43048      125 2.312106   4.722848
#> 94      62948      111 1.881901   3.514578
#> 95      68220      143 2.762740   6.676584
#> 96      71903       93 1.191413   1.633253
#> 97      57628      134 2.506380   5.689434
#> 98      68536       94 1.762455   3.185332
#> 99      59064      122 2.244709   3.228133
#> 100     75151       76 1.237181   1.926689
#> 101     54868      109 2.049756   3.112191
#> 102     64214      113 2.402765   5.524994
#> 103     73600      138 2.328157   5.399680
#> 104     87706       91 1.348103   1.939790
#> 105     54910      149 2.994047   9.322615
#> 106     12661       62 1.535866   2.125492
#> 107     79897      140 2.708105   7.665384
#> 108     74025       84 1.132858   1.643401
#> 109     13306       64 2.694278   8.726761
#> 110     49281      117 2.895546   8.795751
#> 111     59189      115 1.897969   3.300783
#> 112     83074       98 1.730608   3.160816
#> 113     62143       94 1.883249   3.704288
#> 114    100948       88 2.303101   6.323346
#> 115     61823      151 2.886861  10.443703
#> 116     55780      106 2.076854   3.806199
#> 117     67947       95 1.513725   1.979928
#> 118     72462       83 1.092397   1.715881
#> 119     62568       88 1.736633   2.607348
#> 120     65700      112 2.300691   4.367626
#> 121     66024      117 1.850855   3.111033
#> 122     64510      118 1.954688   3.415014
#> 123     35562       74 2.322564   4.569763
#> 124     19815       81 2.625129   7.434818
#> 125     93912       99 1.388232   1.929344
#> 126     95057       98 1.183106   1.723783
#> 127     62182       97 1.545273   1.964611
#> 128     64910       65 1.479088   2.999758
#> 129     81724      113 1.854795   3.191583
#> 130     68448      110 2.017750   3.822747
#> 131     50623      131 2.370888   3.502136
#> 132     76937      127 2.098633   3.913458
#> 133     76768      144 2.553445   5.242416
#> 134    119130      129 2.143971   4.757131
#> 135     47330       85 2.443231   7.341955
#> 136     61753       85 1.091612   1.619210
#> 137     74052      137 2.440327   5.019960
#> 138     78895      231 3.630423  16.289708
#> 139     70449      126 2.327186   6.167252
#> 140     89860      164 2.095345   2.832785
#> 141     17284       71 2.503204   4.558187
#> 142     82683      146 1.671330   2.164197
#> 143     39768       97 2.593612   6.485594
#> 144     91264      154 2.982277  10.525227
#> 145     66945      153 3.025423  10.841875
#> 146     13041       56 2.245492   4.869926
#> 147    137123      168 2.737981   6.583945
#> 148    150606      130 1.931330   3.145668
#> 149     98788      133 1.838470   2.605106
#> 150     24532       92 2.400227   5.721092
#> 151    114464      119 2.417572   7.005522
#> 152     31313       70 1.784414   2.933062
#> 153    111246      136 2.351109   5.675295
#> 154     73617      111 2.201648   4.072389
#> 155    102970       91 2.255857   6.443645
#> 156     23315       94 3.082426  12.354025
#> 157     72096      112 2.254604   4.195193
#> 158    139998      110 1.379623   2.043255
#> 159    100690       90 1.893362   3.779776
#> 160     14975       67 2.743060   8.426671
#> 161     48827       81 2.062135   3.716522
#> 162     38071      120 2.975275  10.626657

plotted_factors <- c('Organism' = 'organism', 'Flooded' = 'flooded', 'Pathogen Concentration' = 'path_conc', 'Trial' = 'experiment')

# Reformat data for plotting
alpha_plot_data <- plotted_factors %>%
  map2_dfr(names(plotted_factors), 
           function(factor, factor_name) {
             out <- metadata
             out$factor <- factor_name
             out$value <- as.character(metadata[[factor]])
             return(out)
           }) %>%
  mutate(path_conc = factor(path_conc, 
                            levels = sort(unique(path_conc)), 
                            labels = paste(sort(unique(path_conc)), 'CFU/g'), 
                            ordered = TRUE)) %>%
  filter(sample_type == 'Sample') %>%
  select(sample_name, factor, value, invsimpson) %>%
  tidyr::gather(key = "index", value = "diversity", -sample_name, -factor, -value) %>%
  mutate(value = forcats::fct_relevel(ordered(value), "Control", "Cin", "Plu", "Cry"))

# ANOVA and Tukey's HSD
anova_and_hsd <- function(x) {
  anova_result <- aov(diversity ~ value, x)
  tukey_result <- agricolae::HSD.test(anova_result, "value", group = TRUE)
  group_data <- tukey_result$groups[order(rownames(tukey_result$groups)),]
  group_key <- setNames(group_data$groups, rownames(group_data))
  group_key[as.character(x$value)]
}
alpha_plot_data$group <- unlist(map(split(alpha_plot_data, alpha_plot_data$factor)[unique(alpha_plot_data$factor)], anova_and_hsd))


alpha_subplot <- ggplot(alpha_plot_data, aes(x = value, y = diversity)) +
  geom_boxplot() +
  geom_text(aes(x = value,
                y = max(diversity) + 2,
                label = group),
            col = 'black',
            size = 5) +
  facet_grid( ~ factor, scales = "free") +
  labs(x = NULL, y = 'Diversity (Inverse Simpson)') +
  guides(color = "none") +
  theme(panel.grid.major.x = element_blank(),
        panel.grid.minor = element_blank(),
        axis.text.x = element_text(angle = 45, hjust = 1),
        legend.position="bottom")

Beta diversity

set.seed(1)
prob_table <- obj$data$prop[metadata$sample_name]
nmds_plot_data <- function(prob_table) {
  metadata <- metadata[metadata$sample_name %in% colnames(prob_table), ]
  set.seed(1)
  nmds_results <- vegan::metaMDS(t(prob_table), trymax = 1000, k = 2, trace = 0)
  nmds_data <- nmds_results$points %>%
    as_tibble() %>%
    bind_cols(metadata)
  names(nmds_data)[1:2] <- paste0("NMDS", 1:2)
  return(nmds_data)
}

nmds_data <- nmds_plot_data(prob_table[! is.na(metadata$organism) & metadata$valid_inoc])

nmds_factors <- c(Flooded = 'flooded', Organism = 'organism', 'Pathogen CFU/g' = 'path_conc', 'Trial' = 'experiment')

make_one_plot <- function(factor, name) {
  nmds_data %>%
    mutate(factor = as.character(nmds_data[[factor]]),
           NMDS1 = scales::rescale(NMDS1),
           NMDS2 = scales::rescale(NMDS2)) %>%
    mutate(factor = fct_relevel(ordered(factor), "Control", "Cin", "Plu", "Cry")) %>%
    ggplot(aes_string(x = "NMDS1", y = "NMDS2", color = "factor", label = "sample_name")) +
    # geom_label(size = 2) +
    geom_point() +
    coord_fixed() +
    viridis::scale_color_viridis(discrete = TRUE, end = .9) +
    labs(color = NULL, x = NULL, y = NULL) +
    theme(panel.grid.major = element_blank(),
          panel.grid.minor = element_blank(),
          axis.text = element_blank(),
          axis.title = element_text(size = 7), 
          axis.ticks = element_blank(),
          plot.margin = unit(rep(0.04, 4), "cm"),
          # panel.background = element_rect(fill = 'transparent', colour = NA),
          # plot.background = element_rect(fill = "white", colour = NA),
          legend.position = "bottom",
          legend.text = element_text(size = 10),
          legend.key.height = unit(0.5, 'cm'),
          legend.key.width = unit(0.2, 'cm'))
}

nmds_subplots <- map2(nmds_factors, names(nmds_factors), make_one_plot)
#> Warning: There was 1 warning in `mutate()`.
#> ℹ In argument: `factor = fct_relevel(ordered(factor), "Control", "Cin", "Plu",
#>   "Cry")`.
#> Caused by warning:
#> ! 4 unknown levels in `f`: Control, Cin, Plu, and Cry
#> Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
#> ℹ Please use tidy evaluation idioms with `aes()`.
#> ℹ See also `vignette("ggplot2-in-packages")` for more information.
#> This warning is displayed once every 8 hours.
#> Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
#> generated.
#> Warning: There was 1 warning in `mutate()`.
#> ℹ In argument: `factor = fct_relevel(ordered(factor), "Control", "Cin", "Plu",
#>   "Cry")`.
#> Caused by warning:
#> ! 4 unknown levels in `f`: Control, Cin, Plu, and Cry
#> There was 1 warning in `mutate()`.
#> ℹ In argument: `factor = fct_relevel(ordered(factor), "Control", "Cin", "Plu",
#>   "Cry")`.
#> Caused by warning:
#> ! 4 unknown levels in `f`: Control, Cin, Plu, and Cry
nmds_plot <- ggpubr::ggarrange(plotlist = c(list(ggplot() + theme_void()), nmds_subplots),
                       nrow = 1, widths = c(0.15, 1, 1, 1, 1))

ggsave(nmds_plot, path = "standard_workflow/combined_figures", filename = "nmds.pdf", 
       width = 7, height = 8)
print(nmds_plot)


combined_div_plot <- ggpubr::ggarrange(alpha_subplot, nmds_plot, ncol = 1, labels = c('A', 'B'),
                               heights = c(1, 1))
combined_div_plot


ggsave(combined_div_plot, path = "standard_workflow/combined_figures", filename = "diversity.pdf", 
       width = 10, height = 6, bg = "#FFFFFF")

ggsave(combined_div_plot, path = "standard_workflow/combined_figures", filename = "diversity.svg", 
       width = 10, height = 6, bg = "#FFFFFF")

Let’s do associated PERMANOVA analyses to better understand significance of differences in diversity between the 3 factors

dist_matrix <- vegdist(t(prob_table), method = "bray")

adonis2_full <- adonis2(dist_matrix ~ organism + flooded + path_conc + experiment, 
                         data = metadata, 
                         permutations = 999,
                         method = "bray")

print(adonis2_full)
#> Permutation test for adonis under reduced model
#> Terms added sequentially (first to last)
#> Permutation: free
#> Number of permutations: 999
#> 
#> adonis2(formula = dist_matrix ~ organism + flooded + path_conc + experiment, data = metadata, permutations = 999, method = "bray")
#>             Df SumOfSqs      R2      F Pr(>F)    
#> organism     3    1.252 0.02381 1.3885  0.061 .  
#> flooded      1    1.848 0.03514 6.1478  0.001 ***
#> path_conc    1    0.276 0.00524 0.9170  0.500    
#> experiment   1    2.616 0.04974 8.7009  0.001 ***
#> Residual   155   46.599 0.88606                  
#> Total      161   52.592 1.00000                  
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1


adonis2_org <- adonis2(dist_matrix ~ organism, 
                         data = metadata, 
                         permutations = 999,
                         method = "bray")

print(adonis2_org)
#> Permutation test for adonis under reduced model
#> Terms added sequentially (first to last)
#> Permutation: free
#> Number of permutations: 999
#> 
#> adonis2(formula = dist_matrix ~ organism, data = metadata, permutations = 999, method = "bray")
#>           Df SumOfSqs      R2      F Pr(>F)
#> organism   3    1.252 0.02381 1.2847  0.114
#> Residual 158   51.339 0.97619              
#> Total    161   52.592 1.00000


adonis2_flooded <- adonis2(dist_matrix ~ flooded, 
                         data = metadata, 
                         permutations = 999,
                         method = "bray")

print(adonis2_flooded)
#> Permutation test for adonis under reduced model
#> Terms added sequentially (first to last)
#> Permutation: free
#> Number of permutations: 999
#> 
#> adonis2(formula = dist_matrix ~ flooded, data = metadata, permutations = 999, method = "bray")
#>           Df SumOfSqs      R2      F Pr(>F)    
#> flooded    1    1.849 0.03516 5.8301  0.001 ***
#> Residual 160   50.743 0.96484                  
#> Total    161   52.592 1.00000                  
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

adonis2_path_conc <- adonis2(dist_matrix ~ path_conc, 
                         data = metadata, 
                         permutations = 999,
                         method = "bray")

print(adonis2_path_conc)
#> Permutation test for adonis under reduced model
#> Terms added sequentially (first to last)
#> Permutation: free
#> Number of permutations: 999
#> 
#> adonis2(formula = dist_matrix ~ path_conc, data = metadata, permutations = 999, method = "bray")
#>            Df SumOfSqs      R2      F Pr(>F)
#> path_conc   1    0.308 0.00586 0.9434  0.487
#> Residual  160   52.283 0.99414              
#> Total     161   52.592 1.00000

adonis2_experiment <- adonis2(dist_matrix ~ experiment, 
                         data = metadata, 
                         permutations = 999,
                         method = "bray")

print(adonis2_experiment)
#> Permutation test for adonis under reduced model
#> Terms added sequentially (first to last)
#> Permutation: free
#> Number of permutations: 999
#> 
#> adonis2(formula = dist_matrix ~ experiment, data = metadata, permutations = 999, method = "bray")
#>             Df SumOfSqs      R2      F Pr(>F)    
#> experiment   1    2.615 0.04972 8.3709  0.001 ***
#> Residual   160   49.977 0.95028                  
#> Total      161   52.592 1.00000                  
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Let’s make new taxmap object in preparation to make heattree examining abundance and diversity of taxa in at least 5 samples

#Make sure our factors are input properly as factors
#Make sure our factors are input properly as factors
metadata$path_conc <- as.factor(metadata$path_conc)
metadata$path_conc<- factor(metadata$path_conc, levels = c("0", "1", "100"))

metadata$organism <- as.factor(metadata$organism)
metadata$organism<- factor(metadata$organism, levels = c("Control", "Cin", "Cry", "Plu"))

metadata$flooded <- as.factor(metadata$flooded)
metadata$flooded <- factor(metadata$flooded, levels = c("TRUE", "FALSE"))

metadata$experiment <- as.factor(metadata$experiment)
metadata$experiment<- factor(metadata$experiment, levels = c("1", "2"))

abundance$is_low_abund <- rowSums(abundance[, metadata$sample_name]) < 5
obj2 <- parse_tax_data(abundance, class_cols = 'dada2_tax', class_sep = ';',
                      class_regex = '^(.+)--(.+)--(.+)$',
                      class_key = c(taxon = 'taxon_name', boot = 'info', rank = 'taxon_rank'))
names(obj2$data) <- c('abund', 'score')
obj2 <- transmute_obs(obj2, 'score', sequence = sequence[input_index], boot = boot, rank = rank)
print(obj2)
#> <Taxmap>
#>   1572 taxa: aab. Eukaryota, aac. Fungi ... cim. NA
#>   1572 edges: NA->aab, aab->aac, aab->aad ... cik->cil, cil->cim
#>   2 data sets:
#>     abund:
#>       # A tibble: 2,697 × 179
#>         taxon_id sequence   dada2_tax is_inoculum inoculum    S1   S10
#>         <chr>    <chr>      <chr>     <lgl>       <chr>    <int> <int>
#>       1 bfz      AAAAAGTCG… Eukaryot… FALSE       <NA>         0     0
#>       2 bga      AAAAAGTCG… Eukaryot… FALSE       <NA>         0     0
#>       3 bgb      AAAAAGTCG… Eukaryot… FALSE       <NA>         0     0
#>       # ℹ 2,694 more rows
#>       # ℹ 172 more variables: S100 <int>, S101 <int>, S102 <int>,
#>       #   S103 <int>, S104 <int>, S105 <int>, S106 <int>, S107 <int>,
#>       #   S108 <int>, S109 <int>, …
#>     score:
#>       # A tibble: 23,356 × 4
#>         taxon_id sequence                                  boot  rank 
#>         <chr>    <chr>                                     <chr> <chr>
#>       1 aab      AAAAAGTCGTAACAAGGTAACCGTAGGTGAACCTGCGGTT… 100   Doma…
#>       2 aac      AAAAAGTCGTAACAAGGTAACCGTAGGTGAACCTGCGGTT… 100   King…
#>       3 aae      AAAAAGTCGTAACAAGGTAACCGTAGGTGAACCTGCGGTT… 100   Phyl…
#>       # ℹ 23,353 more rows
#>   0 functions:

obj2$data$tax_abund <- calc_taxon_abund(obj2, data = "abund", cols = metadata$sample_name)
#> Summing per-taxon counts from 162 columns for 1572 taxa

obj2$data$abund_prop <- calc_obs_props(obj2, data = "abund", cols = metadata$sample_name, groups = rep("tax_prop", nrow(metadata)))
#> Calculating proportions from counts for 162 columns in 1 groups for 2697 observations.
obj2$data$tax_prop <- calc_taxon_abund(obj2, data = "abund_prop", cols = "tax_prop")
#> Summing per-taxon counts from 1 columns for 1572 taxa
obj2$data$abund_prop <- NULL

obj_subset <- obj2 %>%
  filter_taxa(taxon_ranks == "Species", supertaxa = TRUE, reassign_obs = c(tax_abund = FALSE)) 
min_bootstrap <- 0

obj_subset$data$score$boot <- as.numeric(obj_subset$data$score$boot)
max_boot <- obj_subset$data$score %>%
  group_by(taxon_id) %>%
  summarise(max = max(boot))
max_boot <- setNames(max_boot$max, max_boot$taxon_id)
obj_subset <- filter_taxa(obj_subset, max_boot[taxon_ids] >= min_bootstrap | taxon_ranks %in% c("ASV", "Reference"), reassign_obs = c(abund = TRUE, score = FALSE))

Let’s make differential abundance plots for our combined dataset-first let’s look at diff. abundance of taxa by inoculum concentration

#> [1] 1.9483e-11 1.0000e+00
#> [1] -9.825868 29.147210

Let’s make differential abundance plots for our combined dataset-first let’s look at diff. abundance of taxa by whether plots were flooded or not

#> [1] 3.732973e-21 9.956213e-01
#> [1] -24.79712   5.11938

Let’s make differential abundance plots for our combined dataset-first let’s look at diff. abundance of taxa by inoculum source

#> [1] 2.67702e-46 1.00000e+00
#> [1] -42.21227  41.73490

Let’s make differential abundance plots for our combined dataset-first let’s look at diff. abundance of taxa by whether pots were in trial 1 or 2

#> [1] 1.147702e-192  9.859411e-01
#> [1] -29.43794  25.78321

Let’s try to facet these plots.

#> quartz_off_screen 
#>                 2

Let’s finally make an exploratory heat tree to look at community composition across all samples, just retaining ASVs present at least 10 times

Overall community plot

obj_subset <- obj2 %>%
  filter_taxa(taxon_ranks == "Species", supertaxa = TRUE, reassign_obs = c(tax_abund = FALSE)) 
min_bootstrap <- 60

obj_subset$data$score$boot <- as.numeric(obj_subset$data$score$boot)
max_boot <- obj_subset$data$score %>%
  group_by(taxon_id) %>%
  summarise(max = max(boot))
max_boot <- setNames(max_boot$max, max_boot$taxon_id)
obj_subset <- filter_taxa(obj_subset, max_boot[taxon_ids] >= min_bootstrap | taxon_ranks %in% c("ASV", "Reference"), reassign_obs = c(abund = TRUE, score = FALSE))

obj_subset$data$asv_prop <- calc_obs_props(obj_subset, 'abund', cols = metadata$sample_name)
#> Calculating proportions from counts for 162 columns for 2697 observations.
obj_subset$data$tax_abund <- calc_taxon_abund(obj_subset, 'abund', cols = metadata$sample_name)
#> Summing per-taxon counts from 162 columns for 1135 taxa
obj_subset$data$tax_prop <- calc_taxon_abund(obj_subset, 'asv_prop', cols = metadata$sample_name)
#> Summing per-taxon counts from 162 columns for 1135 taxa
obj_subset$data$tax_data <- calc_n_samples(obj_subset, 'tax_prop', cols = metadata$sample_name[metadata$sample_type == 'Sample'])
#> Calculating number of samples with a value greater than 0 for 162 columns for 1135 observations
obj_subset$data$tax_data$mean_prop <- rowMeans(obj_subset$data$tax_prop[, metadata$sample_name])

# Calculate the relative standard deviation for each taxon as a measure of how consistently it was found.
rsd <- function(x, na.rm = FALSE) {sd(x, na.rm = na.rm) / mean(x, na.rm = na.rm)}
obj_subset$data$tax_data$rel_stand_dev <- map_dbl(1:nrow(obj_subset$data$tax_abund), function(i) {
  rsd(unlist(obj_subset$data$tax_abund[i, metadata$sample_name]), na.rm = TRUE)
})
obj_subset
#> <Taxmap>
#>   1135 taxa: aab. Eukaryota ... cim. NA
#>   1135 edges: NA->aab, aab->aac, aab->aad ... cik->cil, cil->cim
#>   6 data sets:
#>     abund:
#>       # A tibble: 2,697 × 179
#>         taxon_id sequence   dada2_tax is_inoculum inoculum    S1   S10
#>         <chr>    <chr>      <chr>     <lgl>       <chr>    <int> <int>
#>       1 bfz      AAAAAGTCG… Eukaryot… FALSE       <NA>         0     0
#>       2 bga      AAAAAGTCG… Eukaryot… FALSE       <NA>         0     0
#>       3 bgb      AAAAAGTCG… Eukaryot… FALSE       <NA>         0     0
#>       # ℹ 2,694 more rows
#>       # ℹ 172 more variables: S100 <int>, S101 <int>, S102 <int>,
#>       #   S103 <int>, S104 <int>, S105 <int>, S106 <int>, S107 <int>,
#>       #   S108 <int>, S109 <int>, …
#>     score:
#>       # A tibble: 22,066 × 4
#>         taxon_id sequence                                   boot rank 
#>         <chr>    <chr>                                     <dbl> <chr>
#>       1 aab      AAAAAGTCGTAACAAGGTAACCGTAGGTGAACCTGCGGTT…   100 Doma…
#>       2 aac      AAAAAGTCGTAACAAGGTAACCGTAGGTGAACCTGCGGTT…   100 King…
#>       3 aae      AAAAAGTCGTAACAAGGTAACCGTAGGTGAACCTGCGGTT…   100 Phyl…
#>       # ℹ 22,063 more rows
#>     tax_abund:
#>       # A tibble: 1,135 × 163
#>         taxon_id    S1   S10  S100  S101  S102  S103  S104  S105  S106
#>         <chr>    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#>       1 aab      56792 35094 50052 42596 85519 70268 52090 54075 51693
#>       2 aac      27698 12210 11111 13034 18437 19077 10116 11106 12154
#>       3 aad      29094 22884 38941 29562 67082 51191 41974 42969 39539
#>       # ℹ 1,132 more rows
#>       # ℹ 153 more variables: S107 <dbl>, S108 <dbl>, S109 <dbl>,
#>       #   S11 <dbl>, S110 <dbl>, S111 <dbl>, S112 <dbl>, S113 <dbl>,
#>       #   S114 <dbl>, S115 <dbl>, …
#>     tax_prop:
#>       # A tibble: 1,135 × 163
#>         taxon_id    S1   S10  S100  S101  S102  S103  S104  S105  S106
#>         <chr>    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#>       1 aab      1     1     1     1     1     1     1     1     1    
#>       2 aac      0.488 0.348 0.222 0.306 0.216 0.271 0.194 0.205 0.235
#>       3 aad      0.512 0.652 0.778 0.694 0.784 0.729 0.806 0.795 0.765
#>       # ℹ 1,132 more rows
#>       # ℹ 153 more variables: S107 <dbl>, S108 <dbl>, S109 <dbl>,
#>       #   S11 <dbl>, S110 <dbl>, S111 <dbl>, S112 <dbl>, S113 <dbl>,
#>       #   S114 <dbl>, S115 <dbl>, …
#>     asv_prop:
#>       # A tibble: 2,697 × 163
#>         taxon_id    S1   S10  S100  S101  S102  S103  S104  S105  S106
#>         <chr>    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#>       1 bfz          0     0     0     0     0     0     0     0     0
#>       2 bga          0     0     0     0     0     0     0     0     0
#>       3 bgb          0     0     0     0     0     0     0     0     0
#>       # ℹ 2,694 more rows
#>       # ℹ 153 more variables: S107 <dbl>, S108 <dbl>, S109 <dbl>,
#>       #   S11 <dbl>, S110 <dbl>, S111 <dbl>, S112 <dbl>, S113 <dbl>,
#>       #   S114 <dbl>, S115 <dbl>, …
#>     tax_data:
#>       # A tibble: 1,135 × 4
#>         taxon_id n_samples mean_prop rel_stand_dev
#>         <chr>        <int>     <dbl>         <dbl>
#>       1 aab            162     1             0.444
#>       2 aac            162     0.416         0.652
#>       3 aad            162     0.584         0.711
#>       # ℹ 1,132 more rows
#>   0 functions:

set.seed(1000)
obj_subset %>%
  filter_taxa(! is_stem) %>%
  filter_taxa(n_samples >= 10, supertaxa = TRUE, reassign_obs = FALSE) %>%
  filter_taxa(! grepl(x = taxon_names, "_sp$"), reassign_obs = FALSE) %>%
  filter_taxa(! grepl(x = taxon_names, "incertae_sedis", ignore.case = TRUE), reassign_obs = FALSE) %>%
  filter_taxa(! grepl(x = taxon_names, "NA", ignore.case = TRUE), reassign_obs = FALSE) %>%
  remove_redundant_names() %>%
  heat_tree(node_size = mean_prop,
            edge_size = n_samples,
            node_color = ifelse(is.na(rel_stand_dev), 0, rel_stand_dev),
            #node_color_range = c("#aaaaaa", "#8da0cb", "#66c2a5", "#a6d854", "#fc8d62", "red"),
            node_label = taxon_names,
            node_size_range = c(0.008, 0.025),
            node_label_size_range = c(0.012, 0.018),
            edge_label_size_range = c(0.010, 0.013),
            node_size_interval = c(0, 1),
            edge_size_range = c(0.001, 0.008),
            #layout = "da", initial_layout = "re",
            node_color_axis_label = "Relative standard deviation",
            node_size_axis_label = "Mean proportion of reads",
            edge_size_axis_label = "Number of samples",
            node_color_digits = 2,
            node_size_digits = 2,
            edge_color_digits = 2,
            edge_size_digits = 2,
            #aspect_ratio = 1.618,
            output_file = file.path('standard_workflow/combined_figures', '/heattree_mostabund_taxa.pdf'))

sessioninfo::session_info()
#> ─ Session info ───────────────────────────────────────────────────────────────
#>  setting  value
#>  version  R version 4.3.2 (2023-10-31)
#>  os       macOS Ventura 13.2.1
#>  system   aarch64, darwin20
#>  ui       X11
#>  language (EN)
#>  collate  en_US.UTF-8
#>  ctype    en_US.UTF-8
#>  tz       America/Los_Angeles
#>  date     2024-11-22
#>  pandoc   3.1.1 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/ (via rmarkdown)
#> 
#> ─ Packages ───────────────────────────────────────────────────────────────────
#>  package              * version    date (UTC) lib source
#>  abind                  1.4-5      2016-07-21 [1] CRAN (R 4.3.0)
#>  ade4                   1.7-22     2023-02-06 [1] CRAN (R 4.3.0)
#>  agricolae              1.3-7      2023-10-22 [1] CRAN (R 4.3.1)
#>  AlgDesign              1.2.1      2022-05-25 [1] CRAN (R 4.3.0)
#>  ape                    5.8        2024-04-11 [1] CRAN (R 4.3.1)
#>  askpass                1.2.0      2023-09-03 [1] CRAN (R 4.3.0)
#>  backports              1.5.0      2024-05-23 [1] CRAN (R 4.3.3)
#>  Biobase                2.62.0     2023-10-26 [1] Bioconductor
#>  BiocGenerics         * 0.48.1     2023-11-02 [1] Bioconductor
#>  BiocParallel           1.36.0     2023-10-26 [1] Bioconductor
#>  biomformat             1.30.0     2023-10-26 [1] Bioconductor
#>  Biostrings           * 2.70.3     2024-04-03 [1] bioc_xgit (@c213e35)
#>  bit                    4.5.0      2024-09-20 [1] CRAN (R 4.3.3)
#>  bit64                  4.5.2      2024-09-22 [1] CRAN (R 4.3.3)
#>  bitops                 1.0-8      2024-07-29 [1] CRAN (R 4.3.3)
#>  broom                  1.0.6      2024-05-17 [1] CRAN (R 4.3.3)
#>  bslib                  0.8.0      2024-07-29 [1] CRAN (R 4.3.3)
#>  cachem                 1.1.0      2024-05-16 [1] CRAN (R 4.3.2)
#>  car                    3.1-2      2023-03-30 [1] CRAN (R 4.3.0)
#>  carData                3.0-5      2022-01-06 [1] CRAN (R 4.3.0)
#>  cli                    3.6.3      2024-06-21 [1] CRAN (R 4.3.2)
#>  cluster                2.1.6      2023-12-01 [1] CRAN (R 4.3.1)
#>  codetools              0.2-20     2024-03-31 [1] CRAN (R 4.3.1)
#>  colorspace             2.1-1      2024-07-26 [1] CRAN (R 4.3.3)
#>  cowplot                1.1.3      2024-01-22 [1] CRAN (R 4.3.1)
#>  crayon                 1.5.3      2024-06-20 [1] CRAN (R 4.3.3)
#>  data.table           * 1.15.4     2024-03-30 [1] CRAN (R 4.3.1)
#>  decontam             * 1.22.0     2023-10-26 [1] Bioconductor
#>  DelayedArray           0.28.0     2023-11-06 [1] Bioconductor
#>  DESeq2                 1.42.1     2024-03-09 [1] Bioconductor 3.18 (R 4.3.3)
#>  digest                 0.6.37     2024-08-19 [1] CRAN (R 4.3.3)
#>  dplyr                * 1.1.4      2023-11-17 [1] CRAN (R 4.3.1)
#>  evaluate               1.0.1      2024-10-10 [1] CRAN (R 4.3.3)
#>  fansi                  1.0.6      2023-12-08 [1] CRAN (R 4.3.1)
#>  farver                 2.1.2      2024-05-13 [1] CRAN (R 4.3.3)
#>  fastmap                1.2.0      2024-05-15 [1] CRAN (R 4.3.2)
#>  forcats              * 1.0.0      2023-01-29 [1] CRAN (R 4.3.0)
#>  foreach                1.5.2      2022-02-02 [1] CRAN (R 4.3.0)
#>  furrr                * 0.3.1      2022-08-15 [1] CRAN (R 4.3.0)
#>  future               * 1.34.0     2024-07-29 [1] CRAN (R 4.3.3)
#>  generics               0.1.3      2022-07-05 [1] CRAN (R 4.3.0)
#>  GenomeInfoDb         * 1.38.8     2024-03-16 [1] Bioconductor 3.18 (R 4.3.3)
#>  GenomeInfoDbData       1.2.11     2023-11-14 [1] Bioconductor
#>  GenomicRanges          1.54.1     2023-10-30 [1] Bioconductor
#>  ggfittext              0.10.2     2024-02-01 [1] CRAN (R 4.3.1)
#>  ggplot2              * 3.5.1      2024-04-23 [1] CRAN (R 4.3.1)
#>  ggpubr                 0.6.0      2023-02-10 [1] CRAN (R 4.3.0)
#>  ggsignif               0.6.4      2022-10-13 [1] CRAN (R 4.3.0)
#>  globals                0.16.3     2024-03-08 [1] CRAN (R 4.3.1)
#>  glue                   1.8.0      2024-09-30 [1] CRAN (R 4.3.3)
#>  gridExtra              2.3        2017-09-09 [1] CRAN (R 4.3.0)
#>  gtable                 0.3.5      2024-04-22 [1] CRAN (R 4.3.1)
#>  highr                  0.11       2024-05-26 [1] CRAN (R 4.3.3)
#>  hms                    1.1.3      2023-03-21 [1] CRAN (R 4.3.0)
#>  htmltools              0.5.8.1    2024-04-04 [1] CRAN (R 4.3.1)
#>  igraph                 2.0.3      2024-03-13 [1] CRAN (R 4.3.1)
#>  IRanges              * 2.36.0     2023-10-26 [1] Bioconductor
#>  iterators              1.0.14     2022-02-05 [1] CRAN (R 4.3.0)
#>  jquerylib              0.1.4      2021-04-26 [1] CRAN (R 4.3.0)
#>  jsonlite               1.8.8      2023-12-04 [1] CRAN (R 4.3.1)
#>  knitr                  1.48       2024-07-07 [1] CRAN (R 4.3.3)
#>  labeling               0.4.3      2023-08-29 [1] CRAN (R 4.3.0)
#>  lattice              * 0.22-6     2024-03-20 [1] CRAN (R 4.3.1)
#>  lazyeval               0.2.2      2019-03-15 [1] CRAN (R 4.3.0)
#>  lifecycle              1.0.4      2023-11-07 [1] CRAN (R 4.3.1)
#>  listenv                0.9.1      2024-01-29 [1] CRAN (R 4.3.1)
#>  locfit                 1.5-9.10   2024-06-24 [1] CRAN (R 4.3.3)
#>  magick               * 2.8.4      2024-07-14 [1] CRAN (R 4.3.3)
#>  magrittr               2.0.3      2022-03-30 [1] CRAN (R 4.3.0)
#>  MASS                   7.3-60.0.1 2024-01-13 [1] CRAN (R 4.3.1)
#>  Matrix                 1.6-5      2024-01-11 [1] CRAN (R 4.3.1)
#>  MatrixGenerics         1.14.0     2023-10-26 [1] Bioconductor
#>  matrixStats            1.3.0      2024-04-11 [1] CRAN (R 4.3.1)
#>  metacoder            * 0.3.7      2024-02-20 [1] CRAN (R 4.3.1)
#>  mgcv                   1.9-1      2023-12-21 [1] CRAN (R 4.3.1)
#>  multtest               2.58.0     2023-10-26 [1] Bioconductor
#>  munsell                0.5.1      2024-04-01 [1] CRAN (R 4.3.1)
#>  nlme                   3.1-165    2024-06-06 [1] CRAN (R 4.3.3)
#>  parallelly             1.38.0     2024-07-27 [1] CRAN (R 4.3.3)
#>  pdftools             * 3.4.1      2024-09-20 [1] CRAN (R 4.3.3)
#>  permute              * 0.9-7      2022-01-27 [1] CRAN (R 4.3.0)
#>  phyloseq             * 1.46.0     2024-04-03 [1] bioc_xgit (@7320133)
#>  pillar                 1.9.0      2023-03-22 [1] CRAN (R 4.3.0)
#>  pkgconfig              2.0.3      2019-09-22 [1] CRAN (R 4.3.0)
#>  plyr                   1.8.9      2023-10-02 [1] CRAN (R 4.3.1)
#>  purrr                * 1.0.2      2023-08-10 [1] CRAN (R 4.3.0)
#>  qpdf                   1.3.4      2024-10-04 [1] CRAN (R 4.3.3)
#>  R6                     2.5.1      2021-08-19 [1] CRAN (R 4.3.0)
#>  ragg                   1.3.2      2024-05-15 [1] CRAN (R 4.3.3)
#>  Rcpp                   1.0.13     2024-07-17 [1] CRAN (R 4.3.2)
#>  RCurl                  1.98-1.16  2024-07-11 [1] CRAN (R 4.3.3)
#>  readr                * 2.1.5      2024-01-10 [1] CRAN (R 4.3.1)
#>  reshape2               1.4.4      2020-04-09 [1] CRAN (R 4.3.0)
#>  rhdf5                  2.46.1     2023-12-02 [1] Bioconductor 3.18 (R 4.3.2)
#>  rhdf5filters           1.14.1     2023-11-06 [1] Bioconductor
#>  Rhdf5lib               1.24.2     2024-02-10 [1] Bioconductor 3.18 (R 4.3.2)
#>  rlang                  1.1.4      2024-06-04 [1] CRAN (R 4.3.2)
#>  rmarkdown              2.27       2024-05-17 [1] CRAN (R 4.3.3)
#>  rstatix                0.7.2      2023-02-01 [1] CRAN (R 4.3.0)
#>  rstudioapi             0.16.0     2024-03-24 [1] CRAN (R 4.3.1)
#>  S4Arrays               1.2.1      2024-03-05 [1] Bioconductor 3.18 (R 4.3.2)
#>  S4Vectors            * 0.40.2     2023-11-25 [1] Bioconductor 3.18 (R 4.3.2)
#>  sass                   0.4.9      2024-03-15 [1] CRAN (R 4.3.1)
#>  scales                 1.3.0      2023-11-28 [1] CRAN (R 4.3.1)
#>  sessioninfo            1.2.2      2021-12-06 [1] CRAN (R 4.3.0)
#>  SparseArray            1.2.4      2024-02-10 [1] Bioconductor 3.18 (R 4.3.2)
#>  stringi                1.8.4      2024-05-06 [1] CRAN (R 4.3.2)
#>  stringr              * 1.5.1      2023-11-14 [1] CRAN (R 4.3.1)
#>  SummarizedExperiment   1.32.0     2023-11-06 [1] Bioconductor
#>  survival               3.7-0      2024-06-05 [1] CRAN (R 4.3.3)
#>  svglite                2.1.3      2023-12-08 [1] CRAN (R 4.3.1)
#>  systemfonts            1.1.0      2024-05-15 [1] CRAN (R 4.3.3)
#>  textshaping            0.4.0      2024-05-24 [1] CRAN (R 4.3.3)
#>  tibble                 3.2.1      2023-03-20 [1] CRAN (R 4.3.0)
#>  tidyr                * 1.3.1      2024-01-24 [1] CRAN (R 4.3.1)
#>  tidyselect             1.2.1      2024-03-11 [1] CRAN (R 4.3.1)
#>  tzdb                   0.4.0      2023-05-12 [1] CRAN (R 4.3.0)
#>  utf8                   1.2.4      2023-10-22 [1] CRAN (R 4.3.1)
#>  vctrs                  0.6.5      2023-12-01 [1] CRAN (R 4.3.1)
#>  vegan                * 2.6-6.1    2024-05-21 [1] CRAN (R 4.3.3)
#>  viridis                0.6.5      2024-01-29 [1] CRAN (R 4.3.1)
#>  viridisLite            0.4.2      2023-05-02 [1] CRAN (R 4.3.0)
#>  vroom                  1.6.5      2023-12-05 [1] CRAN (R 4.3.1)
#>  withr                  3.0.1      2024-07-31 [1] CRAN (R 4.3.2)
#>  xfun                   0.48       2024-10-03 [1] CRAN (R 4.3.3)
#>  XVector              * 0.42.0     2023-10-26 [1] Bioconductor
#>  yaml                   2.3.10     2024-07-26 [1] CRAN (R 4.3.3)
#>  zlibbioc               1.48.2     2024-03-19 [1] Bioconductor 3.18 (R 4.3.3)
#> 
#>  [1] /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library
#> 
#> ──────────────────────────────────────────────────────────────────────────────